summaryrefslogtreecommitdiff
path: root/deps/rabbit/src
diff options
context:
space:
mode:
Diffstat (limited to 'deps/rabbit/src')
-rw-r--r--deps/rabbit/src/amqqueue.erl762
-rw-r--r--deps/rabbit/src/amqqueue_v1.erl584
-rw-r--r--deps/rabbit/src/background_gc.erl78
-rw-r--r--deps/rabbit/src/code_server_cache.erl81
-rw-r--r--deps/rabbit/src/gatherer.erl151
-rw-r--r--deps/rabbit/src/gm.erl1650
-rw-r--r--deps/rabbit/src/internal_user.erl216
-rw-r--r--deps/rabbit/src/internal_user_v1.erl151
-rw-r--r--deps/rabbit/src/lager_exchange_backend.erl233
-rw-r--r--deps/rabbit/src/lqueue.erl102
-rw-r--r--deps/rabbit/src/mirrored_supervisor_sups.erl34
-rw-r--r--deps/rabbit/src/pg_local.erl249
-rw-r--r--deps/rabbit/src/rabbit.erl1511
-rw-r--r--deps/rabbit/src/rabbit_access_control.erl257
-rw-r--r--deps/rabbit/src/rabbit_alarm.erl365
-rw-r--r--deps/rabbit/src/rabbit_amqqueue.erl1889
-rw-r--r--deps/rabbit/src/rabbit_amqqueue_process.erl1849
-rw-r--r--deps/rabbit/src/rabbit_amqqueue_sup.erl35
-rw-r--r--deps/rabbit/src/rabbit_amqqueue_sup_sup.erl84
-rw-r--r--deps/rabbit/src/rabbit_auth_backend_internal.erl1076
-rw-r--r--deps/rabbit/src/rabbit_auth_mechanism_amqplain.erl54
-rw-r--r--deps/rabbit/src/rabbit_auth_mechanism_cr_demo.erl48
-rw-r--r--deps/rabbit/src/rabbit_auth_mechanism_plain.erl60
-rw-r--r--deps/rabbit/src/rabbit_autoheal.erl456
-rw-r--r--deps/rabbit/src/rabbit_backing_queue.erl264
-rw-r--r--deps/rabbit/src/rabbit_basic.erl354
-rw-r--r--deps/rabbit/src/rabbit_binding.erl691
-rw-r--r--deps/rabbit/src/rabbit_boot_steps.erl91
-rw-r--r--deps/rabbit/src/rabbit_channel.erl2797
-rw-r--r--deps/rabbit/src/rabbit_channel_interceptor.erl104
-rw-r--r--deps/rabbit/src/rabbit_channel_sup.erl92
-rw-r--r--deps/rabbit/src/rabbit_channel_sup_sup.erl42
-rw-r--r--deps/rabbit/src/rabbit_channel_tracking.erl291
-rw-r--r--deps/rabbit/src/rabbit_channel_tracking_handler.erl71
-rw-r--r--deps/rabbit/src/rabbit_classic_queue.erl527
-rw-r--r--deps/rabbit/src/rabbit_client_sup.erl43
-rw-r--r--deps/rabbit/src/rabbit_config.erl46
-rw-r--r--deps/rabbit/src/rabbit_confirms.erl152
-rw-r--r--deps/rabbit/src/rabbit_connection_helper_sup.erl57
-rw-r--r--deps/rabbit/src/rabbit_connection_sup.erl66
-rw-r--r--deps/rabbit/src/rabbit_connection_tracking.erl515
-rw-r--r--deps/rabbit/src/rabbit_connection_tracking_handler.erl80
-rw-r--r--deps/rabbit/src/rabbit_control_pbe.erl82
-rw-r--r--deps/rabbit/src/rabbit_core_ff.erl179
-rw-r--r--deps/rabbit/src/rabbit_core_metrics_gc.erl199
-rw-r--r--deps/rabbit/src/rabbit_credential_validation.erl44
-rw-r--r--deps/rabbit/src/rabbit_credential_validator.erl19
-rw-r--r--deps/rabbit/src/rabbit_credential_validator_accept_everything.erl23
-rw-r--r--deps/rabbit/src/rabbit_credential_validator_min_password_length.erl50
-rw-r--r--deps/rabbit/src/rabbit_credential_validator_password_regexp.erl42
-rw-r--r--deps/rabbit/src/rabbit_dead_letter.erl253
-rw-r--r--deps/rabbit/src/rabbit_definitions.erl767
-rw-r--r--deps/rabbit/src/rabbit_diagnostics.erl119
-rw-r--r--deps/rabbit/src/rabbit_direct.erl235
-rw-r--r--deps/rabbit/src/rabbit_disk_monitor.erl317
-rw-r--r--deps/rabbit/src/rabbit_epmd_monitor.erl104
-rw-r--r--deps/rabbit/src/rabbit_event_consumer.erl197
-rw-r--r--deps/rabbit/src/rabbit_exchange.erl592
-rw-r--r--deps/rabbit/src/rabbit_exchange_decorator.erl105
-rw-r--r--deps/rabbit/src/rabbit_exchange_parameters.erl39
-rw-r--r--deps/rabbit/src/rabbit_exchange_type_direct.erl46
-rw-r--r--deps/rabbit/src/rabbit_exchange_type_fanout.erl45
-rw-r--r--deps/rabbit/src/rabbit_exchange_type_headers.erl136
-rw-r--r--deps/rabbit/src/rabbit_exchange_type_invalid.erl45
-rw-r--r--deps/rabbit/src/rabbit_exchange_type_topic.erl266
-rw-r--r--deps/rabbit/src/rabbit_feature_flags.erl2470
-rw-r--r--deps/rabbit/src/rabbit_ff_extra.erl244
-rw-r--r--deps/rabbit/src/rabbit_ff_registry.erl189
-rw-r--r--deps/rabbit/src/rabbit_fhc_helpers.erl45
-rw-r--r--deps/rabbit/src/rabbit_fifo.erl2124
-rw-r--r--deps/rabbit/src/rabbit_fifo.hrl210
-rw-r--r--deps/rabbit/src/rabbit_fifo_client.erl888
-rw-r--r--deps/rabbit/src/rabbit_fifo_index.erl119
-rw-r--r--deps/rabbit/src/rabbit_fifo_v0.erl1961
-rw-r--r--deps/rabbit/src/rabbit_fifo_v0.hrl195
-rw-r--r--deps/rabbit/src/rabbit_file.erl321
-rw-r--r--deps/rabbit/src/rabbit_framing.erl36
-rw-r--r--deps/rabbit/src/rabbit_guid.erl181
-rw-r--r--deps/rabbit/src/rabbit_health_check.erl80
-rw-r--r--deps/rabbit/src/rabbit_lager.erl723
-rw-r--r--deps/rabbit/src/rabbit_limiter.erl448
-rw-r--r--deps/rabbit/src/rabbit_log_tail.erl102
-rw-r--r--deps/rabbit/src/rabbit_looking_glass.erl48
-rw-r--r--deps/rabbit/src/rabbit_maintenance.erl354
-rw-r--r--deps/rabbit/src/rabbit_memory_monitor.erl259
-rw-r--r--deps/rabbit/src/rabbit_metrics.erl45
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_coordinator.erl460
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_master.erl578
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_misc.erl680
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_mode.erl42
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_mode_all.erl32
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl45
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl69
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_slave.erl1093
-rw-r--r--deps/rabbit/src/rabbit_mirror_queue_sync.erl420
-rw-r--r--deps/rabbit/src/rabbit_mnesia.erl1117
-rw-r--r--deps/rabbit/src/rabbit_mnesia_rename.erl276
-rw-r--r--deps/rabbit/src/rabbit_msg_file.erl114
-rw-r--r--deps/rabbit/src/rabbit_msg_record.erl400
-rw-r--r--deps/rabbit/src/rabbit_msg_store.erl2245
-rw-r--r--deps/rabbit/src/rabbit_msg_store_ets_index.erl76
-rw-r--r--deps/rabbit/src/rabbit_msg_store_gc.erl125
-rw-r--r--deps/rabbit/src/rabbit_networking.erl663
-rw-r--r--deps/rabbit/src/rabbit_node_monitor.erl926
-rw-r--r--deps/rabbit/src/rabbit_nodes.erl157
-rw-r--r--deps/rabbit/src/rabbit_osiris_metrics.erl103
-rw-r--r--deps/rabbit/src/rabbit_parameter_validation.erl88
-rw-r--r--deps/rabbit/src/rabbit_password.erl52
-rw-r--r--deps/rabbit/src/rabbit_password_hashing_md5.erl19
-rw-r--r--deps/rabbit/src/rabbit_password_hashing_sha256.erl15
-rw-r--r--deps/rabbit/src/rabbit_password_hashing_sha512.erl15
-rw-r--r--deps/rabbit/src/rabbit_peer_discovery.erl326
-rw-r--r--deps/rabbit/src/rabbit_peer_discovery_classic_config.erl75
-rw-r--r--deps/rabbit/src/rabbit_peer_discovery_dns.erl113
-rw-r--r--deps/rabbit/src/rabbit_plugins.erl699
-rw-r--r--deps/rabbit/src/rabbit_policies.erl179
-rw-r--r--deps/rabbit/src/rabbit_policy.erl557
-rw-r--r--deps/rabbit/src/rabbit_policy_merge_strategy.erl19
-rw-r--r--deps/rabbit/src/rabbit_prelaunch_cluster.erl22
-rw-r--r--deps/rabbit/src/rabbit_prelaunch_enabled_plugins_file.erl53
-rw-r--r--deps/rabbit/src/rabbit_prelaunch_feature_flags.erl32
-rw-r--r--deps/rabbit/src/rabbit_prelaunch_logging.erl75
-rw-r--r--deps/rabbit/src/rabbit_prequeue.erl100
-rw-r--r--deps/rabbit/src/rabbit_priority_queue.erl688
-rw-r--r--deps/rabbit/src/rabbit_queue_consumers.erl568
-rw-r--r--deps/rabbit/src/rabbit_queue_decorator.erl72
-rw-r--r--deps/rabbit/src/rabbit_queue_index.erl1521
-rw-r--r--deps/rabbit/src/rabbit_queue_location_client_local.erl39
-rw-r--r--deps/rabbit/src/rabbit_queue_location_min_masters.erl70
-rw-r--r--deps/rabbit/src/rabbit_queue_location_random.erl42
-rw-r--r--deps/rabbit/src/rabbit_queue_location_validator.erl67
-rw-r--r--deps/rabbit/src/rabbit_queue_master_location_misc.erl108
-rw-r--r--deps/rabbit/src/rabbit_queue_master_locator.erl19
-rw-r--r--deps/rabbit/src/rabbit_queue_type.erl581
-rw-r--r--deps/rabbit/src/rabbit_queue_type_util.erl74
-rw-r--r--deps/rabbit/src/rabbit_quorum_memory_manager.erl67
-rw-r--r--deps/rabbit/src/rabbit_quorum_queue.erl1523
-rw-r--r--deps/rabbit/src/rabbit_ra_registry.erl25
-rw-r--r--deps/rabbit/src/rabbit_reader.erl1803
-rw-r--r--deps/rabbit/src/rabbit_recovery_terms.erl240
-rw-r--r--deps/rabbit/src/rabbit_restartable_sup.erl33
-rw-r--r--deps/rabbit/src/rabbit_router.erl65
-rw-r--r--deps/rabbit/src/rabbit_runtime_parameters.erl412
-rw-r--r--deps/rabbit/src/rabbit_ssl.erl195
-rw-r--r--deps/rabbit/src/rabbit_stream_coordinator.erl949
-rw-r--r--deps/rabbit/src/rabbit_stream_queue.erl734
-rw-r--r--deps/rabbit/src/rabbit_sup.erl109
-rw-r--r--deps/rabbit/src/rabbit_sysmon_handler.erl235
-rw-r--r--deps/rabbit/src/rabbit_sysmon_minder.erl156
-rw-r--r--deps/rabbit/src/rabbit_table.erl416
-rw-r--r--deps/rabbit/src/rabbit_trace.erl128
-rw-r--r--deps/rabbit/src/rabbit_tracking.erl103
-rw-r--r--deps/rabbit/src/rabbit_upgrade.erl314
-rw-r--r--deps/rabbit/src/rabbit_upgrade_functions.erl662
-rw-r--r--deps/rabbit/src/rabbit_upgrade_preparation.erl51
-rw-r--r--deps/rabbit/src/rabbit_variable_queue.erl3015
-rw-r--r--deps/rabbit/src/rabbit_version.erl227
-rw-r--r--deps/rabbit/src/rabbit_vhost.erl422
-rw-r--r--deps/rabbit/src/rabbit_vhost_limit.erl205
-rw-r--r--deps/rabbit/src/rabbit_vhost_msg_store.erl68
-rw-r--r--deps/rabbit/src/rabbit_vhost_process.erl96
-rw-r--r--deps/rabbit/src/rabbit_vhost_sup.erl22
-rw-r--r--deps/rabbit/src/rabbit_vhost_sup_sup.erl271
-rw-r--r--deps/rabbit/src/rabbit_vhost_sup_wrapper.erl57
-rw-r--r--deps/rabbit/src/rabbit_vm.erl427
-rw-r--r--deps/rabbit/src/supervised_lifecycle.erl53
-rw-r--r--deps/rabbit/src/tcp_listener.erl90
-rw-r--r--deps/rabbit/src/tcp_listener_sup.erl54
-rw-r--r--deps/rabbit/src/term_to_binary_compat.erl15
-rw-r--r--deps/rabbit/src/vhost.erl172
-rw-r--r--deps/rabbit/src/vhost_v1.erl106
171 files changed, 62633 insertions, 0 deletions
diff --git a/deps/rabbit/src/amqqueue.erl b/deps/rabbit/src/amqqueue.erl
new file mode 100644
index 0000000000..3415ebd073
--- /dev/null
+++ b/deps/rabbit/src/amqqueue.erl
@@ -0,0 +1,762 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(amqqueue). %% Could become amqqueue_v2 in the future.
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-export([new/8,
+ new/9,
+ new_with_version/9,
+ new_with_version/10,
+ fields/0,
+ fields/1,
+ field_vhost/0,
+ record_version_to_use/0,
+ upgrade/1,
+ upgrade_to/2,
+ % arguments
+ get_arguments/1,
+ set_arguments/2,
+ % decorators
+ get_decorators/1,
+ set_decorators/2,
+ % exclusive_owner
+ get_exclusive_owner/1,
+ % gm_pids
+ get_gm_pids/1,
+ set_gm_pids/2,
+ get_leader/1,
+ % name (#resource)
+ get_name/1,
+ set_name/2,
+ % operator_policy
+ get_operator_policy/1,
+ set_operator_policy/2,
+ get_options/1,
+ % pid
+ get_pid/1,
+ set_pid/2,
+ % policy
+ get_policy/1,
+ set_policy/2,
+ % policy_version
+ get_policy_version/1,
+ set_policy_version/2,
+ % type_state
+ get_type_state/1,
+ set_type_state/2,
+ % recoverable_slaves
+ get_recoverable_slaves/1,
+ set_recoverable_slaves/2,
+ % slave_pids
+ get_slave_pids/1,
+ set_slave_pids/2,
+ % slave_pids_pending_shutdown
+ get_slave_pids_pending_shutdown/1,
+ set_slave_pids_pending_shutdown/2,
+ % state
+ get_state/1,
+ set_state/2,
+ % sync_slave_pids
+ get_sync_slave_pids/1,
+ set_sync_slave_pids/2,
+ get_type/1,
+ get_vhost/1,
+ is_amqqueue/1,
+ is_auto_delete/1,
+ is_durable/1,
+ is_classic/1,
+ is_quorum/1,
+ pattern_match_all/0,
+ pattern_match_on_name/1,
+ pattern_match_on_type/1,
+ reset_mirroring_and_decorators/1,
+ set_immutable/1,
+ qnode/1,
+ macros/0]).
+
+-define(record_version, amqqueue_v2).
+-define(is_backwards_compat_classic(T),
+ (T =:= classic orelse T =:= ?amqqueue_v1_type)).
+
+-record(amqqueue, {
+ name :: rabbit_amqqueue:name() | '_', %% immutable
+ durable :: boolean() | '_', %% immutable
+ auto_delete :: boolean() | '_', %% immutable
+ exclusive_owner = none :: pid() | none | '_', %% immutable
+ arguments = [] :: rabbit_framing:amqp_table() | '_', %% immutable
+ pid :: pid() | ra_server_id() | none | '_', %% durable (just so we
+ %% know home node)
+ slave_pids = [] :: [pid()] | none | '_', %% transient
+ sync_slave_pids = [] :: [pid()] | none| '_',%% transient
+ recoverable_slaves = [] :: [atom()] | none | '_', %% durable
+ policy :: binary() | none | undefined | '_', %% durable, implicit
+ %% update as above
+ operator_policy :: binary() | none | undefined | '_', %% durable,
+ %% implicit
+ %% update
+ %% as above
+ gm_pids = [] :: [{pid(), pid()}] | none | '_', %% transient
+ decorators :: [atom()] | none | undefined | '_', %% transient,
+ %% recalculated
+ %% as above
+ state = live :: atom() | none | '_', %% durable (have we crashed?)
+ policy_version = 0 :: non_neg_integer() | '_',
+ slave_pids_pending_shutdown = [] :: [pid()] | '_',
+ vhost :: rabbit_types:vhost() | undefined | '_', %% secondary index
+ options = #{} :: map() | '_',
+ type = ?amqqueue_v1_type :: module() | '_',
+ type_state = #{} :: map() | '_'
+ }).
+
+-type amqqueue() :: amqqueue_v1:amqqueue_v1() | amqqueue_v2().
+-type amqqueue_v2() :: #amqqueue{
+ name :: rabbit_amqqueue:name(),
+ durable :: boolean(),
+ auto_delete :: boolean(),
+ exclusive_owner :: pid() | none,
+ arguments :: rabbit_framing:amqp_table(),
+ pid :: pid() | ra_server_id() | none,
+ slave_pids :: [pid()] | none,
+ sync_slave_pids :: [pid()] | none,
+ recoverable_slaves :: [atom()] | none,
+ policy :: binary() | none | undefined,
+ operator_policy :: binary() | none | undefined,
+ gm_pids :: [{pid(), pid()}] | none,
+ decorators :: [atom()] | none | undefined,
+ state :: atom() | none,
+ policy_version :: non_neg_integer(),
+ slave_pids_pending_shutdown :: [pid()],
+ vhost :: rabbit_types:vhost() | undefined,
+ options :: map(),
+ type :: atom(),
+ type_state :: #{}
+ }.
+
+-type ra_server_id() :: {Name :: atom(), Node :: node()}.
+
+-type amqqueue_pattern() :: amqqueue_v1:amqqueue_v1_pattern() |
+ amqqueue_v2_pattern().
+-type amqqueue_v2_pattern() :: #amqqueue{
+ name :: rabbit_amqqueue:name() | '_',
+ durable :: '_',
+ auto_delete :: '_',
+ exclusive_owner :: '_',
+ arguments :: '_',
+ pid :: '_',
+ slave_pids :: '_',
+ sync_slave_pids :: '_',
+ recoverable_slaves :: '_',
+ policy :: '_',
+ operator_policy :: '_',
+ gm_pids :: '_',
+ decorators :: '_',
+ state :: '_',
+ policy_version :: '_',
+ slave_pids_pending_shutdown :: '_',
+ vhost :: '_',
+ options :: '_',
+ type :: atom() | '_',
+ type_state :: '_'
+ }.
+
+-export_type([amqqueue/0,
+ amqqueue_v2/0,
+ amqqueue_pattern/0,
+ amqqueue_v2_pattern/0,
+ ra_server_id/0]).
+
+-spec new(rabbit_amqqueue:name(),
+ pid() | ra_server_id() | none,
+ boolean(),
+ boolean(),
+ pid() | none,
+ rabbit_framing:amqp_table(),
+ rabbit_types:vhost() | undefined,
+ map()) -> amqqueue().
+
+new(#resource{kind = queue} = Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options)
+ when (is_pid(Pid) orelse is_tuple(Pid) orelse Pid =:= none) andalso
+ is_boolean(Durable) andalso
+ is_boolean(AutoDelete) andalso
+ (is_pid(Owner) orelse Owner =:= none) andalso
+ is_list(Args) andalso
+ (is_binary(VHost) orelse VHost =:= undefined) andalso
+ is_map(Options) ->
+ new(Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ ?amqqueue_v1_type).
+
+-spec new(rabbit_amqqueue:name(),
+ pid() | ra_server_id() | none,
+ boolean(),
+ boolean(),
+ pid() | none,
+ rabbit_framing:amqp_table(),
+ rabbit_types:vhost() | undefined,
+ map(),
+ atom()) -> amqqueue().
+
+new(#resource{kind = queue} = Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ Type)
+ when (is_pid(Pid) orelse is_tuple(Pid) orelse Pid =:= none) andalso
+ is_boolean(Durable) andalso
+ is_boolean(AutoDelete) andalso
+ (is_pid(Owner) orelse Owner =:= none) andalso
+ is_list(Args) andalso
+ (is_binary(VHost) orelse VHost =:= undefined) andalso
+ is_map(Options) andalso
+ is_atom(Type) ->
+ case record_version_to_use() of
+ ?record_version ->
+ new_with_version(
+ ?record_version,
+ Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ Type);
+ _ ->
+ amqqueue_v1:new(
+ Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ Type)
+ end.
+
+-spec new_with_version
+(amqqueue_v1 | amqqueue_v2,
+ rabbit_amqqueue:name(),
+ pid() | ra_server_id() | none,
+ boolean(),
+ boolean(),
+ pid() | none,
+ rabbit_framing:amqp_table(),
+ rabbit_types:vhost() | undefined,
+ map()) -> amqqueue().
+
+new_with_version(RecordVersion,
+ #resource{kind = queue} = Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options)
+ when (is_pid(Pid) orelse is_tuple(Pid) orelse Pid =:= none) andalso
+ is_boolean(Durable) andalso
+ is_boolean(AutoDelete) andalso
+ (is_pid(Owner) orelse Owner =:= none) andalso
+ is_list(Args) andalso
+ (is_binary(VHost) orelse VHost =:= undefined) andalso
+ is_map(Options) ->
+ new_with_version(RecordVersion,
+ Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ ?amqqueue_v1_type).
+
+-spec new_with_version
+(amqqueue_v1 | amqqueue_v2,
+ rabbit_amqqueue:name(),
+ pid() | ra_server_id() | none,
+ boolean(),
+ boolean(),
+ pid() | none,
+ rabbit_framing:amqp_table(),
+ rabbit_types:vhost() | undefined,
+ map(),
+ atom()) -> amqqueue().
+
+new_with_version(?record_version,
+ #resource{kind = queue} = Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ Type)
+ when (is_pid(Pid) orelse is_tuple(Pid) orelse Pid =:= none) andalso
+ is_boolean(Durable) andalso
+ is_boolean(AutoDelete) andalso
+ (is_pid(Owner) orelse Owner =:= none) andalso
+ is_list(Args) andalso
+ (is_binary(VHost) orelse VHost =:= undefined) andalso
+ is_map(Options) andalso
+ is_atom(Type) ->
+ #amqqueue{name = Name,
+ durable = Durable,
+ auto_delete = AutoDelete,
+ arguments = Args,
+ exclusive_owner = Owner,
+ pid = Pid,
+ vhost = VHost,
+ options = Options,
+ type = ensure_type_compat(Type)};
+new_with_version(Version,
+ Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ Type)
+ when ?is_backwards_compat_classic(Type) ->
+ amqqueue_v1:new_with_version(
+ Version,
+ Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options).
+
+-spec is_amqqueue(any()) -> boolean().
+
+is_amqqueue(#amqqueue{}) -> true;
+is_amqqueue(Queue) -> amqqueue_v1:is_amqqueue(Queue).
+
+-spec record_version_to_use() -> amqqueue_v1 | amqqueue_v2.
+
+record_version_to_use() ->
+ case rabbit_feature_flags:is_enabled(quorum_queue) of
+ true -> ?record_version;
+ false -> amqqueue_v1:record_version_to_use()
+ end.
+
+-spec upgrade(amqqueue()) -> amqqueue().
+
+upgrade(#amqqueue{} = Queue) -> Queue;
+upgrade(OldQueue) -> upgrade_to(record_version_to_use(), OldQueue).
+
+-spec upgrade_to
+(amqqueue_v2, amqqueue()) -> amqqueue_v2();
+(amqqueue_v1, amqqueue_v1:amqqueue_v1()) -> amqqueue_v1:amqqueue_v1().
+
+upgrade_to(?record_version, #amqqueue{} = Queue) ->
+ Queue;
+upgrade_to(?record_version, OldQueue) ->
+ Fields = erlang:tuple_to_list(OldQueue) ++ [?amqqueue_v1_type,
+ undefined],
+ #amqqueue{} = erlang:list_to_tuple(Fields);
+upgrade_to(Version, OldQueue) ->
+ amqqueue_v1:upgrade_to(Version, OldQueue).
+
+% arguments
+
+-spec get_arguments(amqqueue()) -> rabbit_framing:amqp_table().
+
+get_arguments(#amqqueue{arguments = Args}) ->
+ Args;
+get_arguments(Queue) ->
+ amqqueue_v1:get_arguments(Queue).
+
+-spec set_arguments(amqqueue(), rabbit_framing:amqp_table()) -> amqqueue().
+
+set_arguments(#amqqueue{} = Queue, Args) ->
+ Queue#amqqueue{arguments = Args};
+set_arguments(Queue, Args) ->
+ amqqueue_v1:set_arguments(Queue, Args).
+
+% decorators
+
+-spec get_decorators(amqqueue()) -> [atom()] | none | undefined.
+
+get_decorators(#amqqueue{decorators = Decorators}) ->
+ Decorators;
+get_decorators(Queue) ->
+ amqqueue_v1:get_decorators(Queue).
+
+-spec set_decorators(amqqueue(), [atom()] | none | undefined) -> amqqueue().
+
+set_decorators(#amqqueue{} = Queue, Decorators) ->
+ Queue#amqqueue{decorators = Decorators};
+set_decorators(Queue, Decorators) ->
+ amqqueue_v1:set_decorators(Queue, Decorators).
+
+-spec get_exclusive_owner(amqqueue()) -> pid() | none.
+
+get_exclusive_owner(#amqqueue{exclusive_owner = Owner}) ->
+ Owner;
+get_exclusive_owner(Queue) ->
+ amqqueue_v1:get_exclusive_owner(Queue).
+
+% gm_pids
+
+-spec get_gm_pids(amqqueue()) -> [{pid(), pid()}] | none.
+
+get_gm_pids(#amqqueue{gm_pids = GMPids}) ->
+ GMPids;
+get_gm_pids(Queue) ->
+ amqqueue_v1:get_gm_pids(Queue).
+
+-spec set_gm_pids(amqqueue(), [{pid(), pid()}] | none) -> amqqueue().
+
+set_gm_pids(#amqqueue{} = Queue, GMPids) ->
+ Queue#amqqueue{gm_pids = GMPids};
+set_gm_pids(Queue, GMPids) ->
+ amqqueue_v1:set_gm_pids(Queue, GMPids).
+
+-spec get_leader(amqqueue_v2()) -> node().
+
+get_leader(#amqqueue{type = rabbit_quorum_queue, pid = {_, Leader}}) -> Leader.
+
+% operator_policy
+
+-spec get_operator_policy(amqqueue()) -> binary() | none | undefined.
+
+get_operator_policy(#amqqueue{operator_policy = OpPolicy}) -> OpPolicy;
+get_operator_policy(Queue) -> amqqueue_v1:get_operator_policy(Queue).
+
+-spec set_operator_policy(amqqueue(), binary() | none | undefined) ->
+ amqqueue().
+
+set_operator_policy(#amqqueue{} = Queue, Policy) ->
+ Queue#amqqueue{operator_policy = Policy};
+set_operator_policy(Queue, Policy) ->
+ amqqueue_v1:set_operator_policy(Queue, Policy).
+
+% name
+
+-spec get_name(amqqueue()) -> rabbit_amqqueue:name().
+
+get_name(#amqqueue{name = Name}) -> Name;
+get_name(Queue) -> amqqueue_v1:get_name(Queue).
+
+-spec set_name(amqqueue(), rabbit_amqqueue:name()) -> amqqueue().
+
+set_name(#amqqueue{} = Queue, Name) ->
+ Queue#amqqueue{name = Name};
+set_name(Queue, Name) ->
+ amqqueue_v1:set_name(Queue, Name).
+
+-spec get_options(amqqueue()) -> map().
+
+get_options(#amqqueue{options = Options}) -> Options;
+get_options(Queue) -> amqqueue_v1:get_options(Queue).
+
+% pid
+
+-spec get_pid
+(amqqueue_v2()) -> pid() | ra_server_id() | none;
+(amqqueue_v1:amqqueue_v1()) -> pid() | none.
+
+get_pid(#amqqueue{pid = Pid}) -> Pid;
+get_pid(Queue) -> amqqueue_v1:get_pid(Queue).
+
+-spec set_pid
+(amqqueue_v2(), pid() | ra_server_id() | none) -> amqqueue_v2();
+(amqqueue_v1:amqqueue_v1(), pid() | none) -> amqqueue_v1:amqqueue_v1().
+
+set_pid(#amqqueue{} = Queue, Pid) ->
+ Queue#amqqueue{pid = Pid};
+set_pid(Queue, Pid) ->
+ amqqueue_v1:set_pid(Queue, Pid).
+
+% policy
+
+-spec get_policy(amqqueue()) -> proplists:proplist() | none | undefined.
+
+get_policy(#amqqueue{policy = Policy}) -> Policy;
+get_policy(Queue) -> amqqueue_v1:get_policy(Queue).
+
+-spec set_policy(amqqueue(), binary() | none | undefined) -> amqqueue().
+
+set_policy(#amqqueue{} = Queue, Policy) ->
+ Queue#amqqueue{policy = Policy};
+set_policy(Queue, Policy) ->
+ amqqueue_v1:set_policy(Queue, Policy).
+
+% policy_version
+
+-spec get_policy_version(amqqueue()) -> non_neg_integer().
+
+get_policy_version(#amqqueue{policy_version = PV}) ->
+ PV;
+get_policy_version(Queue) ->
+ amqqueue_v1:get_policy_version(Queue).
+
+-spec set_policy_version(amqqueue(), non_neg_integer()) -> amqqueue().
+
+set_policy_version(#amqqueue{} = Queue, PV) ->
+ Queue#amqqueue{policy_version = PV};
+set_policy_version(Queue, PV) ->
+ amqqueue_v1:set_policy_version(Queue, PV).
+
+% recoverable_slaves
+
+-spec get_recoverable_slaves(amqqueue()) -> [atom()] | none.
+
+get_recoverable_slaves(#amqqueue{recoverable_slaves = Slaves}) ->
+ Slaves;
+get_recoverable_slaves(Queue) ->
+ amqqueue_v1:get_recoverable_slaves(Queue).
+
+-spec set_recoverable_slaves(amqqueue(), [atom()] | none) -> amqqueue().
+
+set_recoverable_slaves(#amqqueue{} = Queue, Slaves) ->
+ Queue#amqqueue{recoverable_slaves = Slaves};
+set_recoverable_slaves(Queue, Slaves) ->
+ amqqueue_v1:set_recoverable_slaves(Queue, Slaves).
+
+% type_state (new in v2)
+
+-spec get_type_state(amqqueue()) -> map().
+get_type_state(#amqqueue{type_state = TState}) ->
+ TState;
+get_type_state(_) ->
+ #{}.
+
+-spec set_type_state(amqqueue(), map()) -> amqqueue().
+set_type_state(#amqqueue{} = Queue, TState) ->
+ Queue#amqqueue{type_state = TState};
+set_type_state(Queue, _TState) ->
+ Queue.
+
+% slave_pids
+
+-spec get_slave_pids(amqqueue()) -> [pid()] | none.
+
+get_slave_pids(#amqqueue{slave_pids = Slaves}) ->
+ Slaves;
+get_slave_pids(Queue) ->
+ amqqueue_v1:get_slave_pids(Queue).
+
+-spec set_slave_pids(amqqueue(), [pid()] | none) -> amqqueue().
+
+set_slave_pids(#amqqueue{} = Queue, SlavePids) ->
+ Queue#amqqueue{slave_pids = SlavePids};
+set_slave_pids(Queue, SlavePids) ->
+ amqqueue_v1:set_slave_pids(Queue, SlavePids).
+
+% slave_pids_pending_shutdown
+
+-spec get_slave_pids_pending_shutdown(amqqueue()) -> [pid()].
+
+get_slave_pids_pending_shutdown(
+ #amqqueue{slave_pids_pending_shutdown = Slaves}) ->
+ Slaves;
+get_slave_pids_pending_shutdown(Queue) ->
+ amqqueue_v1:get_slave_pids_pending_shutdown(Queue).
+
+-spec set_slave_pids_pending_shutdown(amqqueue(), [pid()]) -> amqqueue().
+
+set_slave_pids_pending_shutdown(#amqqueue{} = Queue, SlavePids) ->
+ Queue#amqqueue{slave_pids_pending_shutdown = SlavePids};
+set_slave_pids_pending_shutdown(Queue, SlavePids) ->
+ amqqueue_v1:set_slave_pids_pending_shutdown(Queue, SlavePids).
+
+% state
+
+-spec get_state(amqqueue()) -> atom() | none.
+
+get_state(#amqqueue{state = State}) -> State;
+get_state(Queue) -> amqqueue_v1:get_state(Queue).
+
+-spec set_state(amqqueue(), atom() | none) -> amqqueue().
+
+set_state(#amqqueue{} = Queue, State) ->
+ Queue#amqqueue{state = State};
+set_state(Queue, State) ->
+ amqqueue_v1:set_state(Queue, State).
+
+% sync_slave_pids
+
+-spec get_sync_slave_pids(amqqueue()) -> [pid()] | none.
+
+get_sync_slave_pids(#amqqueue{sync_slave_pids = Pids}) ->
+ Pids;
+get_sync_slave_pids(Queue) ->
+ amqqueue_v1:get_sync_slave_pids(Queue).
+
+-spec set_sync_slave_pids(amqqueue(), [pid()] | none) -> amqqueue().
+
+set_sync_slave_pids(#amqqueue{} = Queue, Pids) ->
+ Queue#amqqueue{sync_slave_pids = Pids};
+set_sync_slave_pids(Queue, Pids) ->
+ amqqueue_v1:set_sync_slave_pids(Queue, Pids).
+
+%% New in v2.
+
+-spec get_type(amqqueue()) -> atom().
+
+get_type(#amqqueue{type = Type}) -> Type;
+get_type(Queue) when ?is_amqqueue(Queue) -> ?amqqueue_v1_type.
+
+-spec get_vhost(amqqueue()) -> rabbit_types:vhost() | undefined.
+
+get_vhost(#amqqueue{vhost = VHost}) -> VHost;
+get_vhost(Queue) -> amqqueue_v1:get_vhost(Queue).
+
+-spec is_auto_delete(amqqueue()) -> boolean().
+
+is_auto_delete(#amqqueue{auto_delete = AutoDelete}) ->
+ AutoDelete;
+is_auto_delete(Queue) ->
+ amqqueue_v1:is_auto_delete(Queue).
+
+-spec is_durable(amqqueue()) -> boolean().
+
+is_durable(#amqqueue{durable = Durable}) -> Durable;
+is_durable(Queue) -> amqqueue_v1:is_durable(Queue).
+
+-spec is_classic(amqqueue()) -> boolean().
+
+is_classic(Queue) ->
+ get_type(Queue) =:= ?amqqueue_v1_type.
+
+-spec is_quorum(amqqueue()) -> boolean().
+
+is_quorum(Queue) ->
+ get_type(Queue) =:= rabbit_quorum_queue.
+
+fields() ->
+ case record_version_to_use() of
+ ?record_version -> fields(?record_version);
+ _ -> amqqueue_v1:fields()
+ end.
+
+fields(?record_version) -> record_info(fields, amqqueue);
+fields(Version) -> amqqueue_v1:fields(Version).
+
+field_vhost() ->
+ case record_version_to_use() of
+ ?record_version -> #amqqueue.vhost;
+ _ -> amqqueue_v1:field_vhost()
+ end.
+
+-spec pattern_match_all() -> amqqueue_pattern().
+
+pattern_match_all() ->
+ case record_version_to_use() of
+ ?record_version -> #amqqueue{_ = '_'};
+ _ -> amqqueue_v1:pattern_match_all()
+ end.
+
+-spec pattern_match_on_name(rabbit_amqqueue:name()) -> amqqueue_pattern().
+
+pattern_match_on_name(Name) ->
+ case record_version_to_use() of
+ ?record_version -> #amqqueue{name = Name, _ = '_'};
+ _ -> amqqueue_v1:pattern_match_on_name(Name)
+ end.
+
+-spec pattern_match_on_type(atom()) -> amqqueue_pattern().
+
+pattern_match_on_type(Type) ->
+ case record_version_to_use() of
+ ?record_version ->
+ #amqqueue{type = Type, _ = '_'};
+ _ when ?is_backwards_compat_classic(Type) ->
+ amqqueue_v1:pattern_match_all();
+ %% FIXME: We try a pattern which should never match when the
+ %% `quorum_queue` feature flag is not enabled yet. Is there
+ %% a better solution?
+ _ ->
+ amqqueue_v1:pattern_match_on_name(
+ rabbit_misc:r(<<0>>, queue, <<0>>))
+ end.
+
+-spec reset_mirroring_and_decorators(amqqueue()) -> amqqueue().
+
+reset_mirroring_and_decorators(#amqqueue{} = Queue) ->
+ Queue#amqqueue{slave_pids = [],
+ sync_slave_pids = [],
+ gm_pids = [],
+ decorators = undefined};
+reset_mirroring_and_decorators(Queue) ->
+ amqqueue_v1:reset_mirroring_and_decorators(Queue).
+
+-spec set_immutable(amqqueue()) -> amqqueue().
+
+set_immutable(#amqqueue{} = Queue) ->
+ Queue#amqqueue{pid = none,
+ slave_pids = [],
+ sync_slave_pids = none,
+ recoverable_slaves = none,
+ gm_pids = none,
+ policy = none,
+ decorators = none,
+ state = none};
+set_immutable(Queue) ->
+ amqqueue_v1:set_immutable(Queue).
+
+-spec qnode(amqqueue() | pid() | ra_server_id()) -> node().
+
+qnode(Queue) when ?is_amqqueue(Queue) ->
+ QPid = get_pid(Queue),
+ qnode(QPid);
+qnode(QPid) when is_pid(QPid) ->
+ node(QPid);
+qnode({_, Node}) ->
+ Node.
+
+% private
+
+macros() ->
+ io:format(
+ "-define(is_~s(Q), is_record(Q, amqqueue, ~b)).~n~n",
+ [?record_version, record_info(size, amqqueue)]),
+ %% The field number starts at 2 because the first element is the
+ %% record name.
+ macros(record_info(fields, amqqueue), 2).
+
+macros([Field | Rest], I) ->
+ io:format(
+ "-define(~s_field_~s(Q), element(~b, Q)).~n",
+ [?record_version, Field, I]),
+ macros(Rest, I + 1);
+macros([], _) ->
+ ok.
+
+ensure_type_compat(classic) ->
+ ?amqqueue_v1_type;
+ensure_type_compat(Type) ->
+ Type.
diff --git a/deps/rabbit/src/amqqueue_v1.erl b/deps/rabbit/src/amqqueue_v1.erl
new file mode 100644
index 0000000000..dd1de74a4e
--- /dev/null
+++ b/deps/rabbit/src/amqqueue_v1.erl
@@ -0,0 +1,584 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(amqqueue_v1).
+
+-include_lib("rabbit_common/include/resource.hrl").
+-include("amqqueue.hrl").
+
+-export([new/8,
+ new/9,
+ new_with_version/9,
+ new_with_version/10,
+ fields/0,
+ fields/1,
+ field_vhost/0,
+ record_version_to_use/0,
+ upgrade/1,
+ upgrade_to/2,
+ % arguments
+ get_arguments/1,
+ set_arguments/2,
+ % decorators
+ get_decorators/1,
+ set_decorators/2,
+ % exclusive_owner
+ get_exclusive_owner/1,
+ % gm_pids
+ get_gm_pids/1,
+ set_gm_pids/2,
+ get_leader/1,
+ % name (#resource)
+ get_name/1,
+ set_name/2,
+ % operator_policy
+ get_operator_policy/1,
+ set_operator_policy/2,
+ get_options/1,
+ % pid
+ get_pid/1,
+ set_pid/2,
+ % policy
+ get_policy/1,
+ set_policy/2,
+ % policy_version
+ get_policy_version/1,
+ set_policy_version/2,
+ % type_state
+ get_type_state/1,
+ set_type_state/2,
+ % recoverable_slaves
+ get_recoverable_slaves/1,
+ set_recoverable_slaves/2,
+ % slave_pids
+ get_slave_pids/1,
+ set_slave_pids/2,
+ % slave_pids_pending_shutdown
+ get_slave_pids_pending_shutdown/1,
+ set_slave_pids_pending_shutdown/2,
+ % state
+ get_state/1,
+ set_state/2,
+ % sync_slave_pids
+ get_sync_slave_pids/1,
+ set_sync_slave_pids/2,
+ get_type/1,
+ get_vhost/1,
+ is_amqqueue/1,
+ is_auto_delete/1,
+ is_durable/1,
+ is_classic/1,
+ is_quorum/1,
+ pattern_match_all/0,
+ pattern_match_on_name/1,
+ pattern_match_on_type/1,
+ reset_mirroring_and_decorators/1,
+ set_immutable/1,
+ qnode/1,
+ macros/0]).
+
+-define(record_version, ?MODULE).
+-define(is_backwards_compat_classic(T),
+ (T =:= classic orelse T =:= ?amqqueue_v1_type)).
+
+-record(amqqueue, {
+ name :: rabbit_amqqueue:name() | '_', %% immutable
+ durable :: boolean() | '_', %% immutable
+ auto_delete :: boolean() | '_', %% immutable
+ exclusive_owner = none :: pid() | none | '_', %% immutable
+ arguments = [] :: rabbit_framing:amqp_table() | '_', %% immutable
+ pid :: pid() | none | '_', %% durable (just so we
+ %% know home node)
+ slave_pids = [] :: [pid()] | none | '_', %% transient
+ sync_slave_pids = [] :: [pid()] | none| '_',%% transient
+ recoverable_slaves = [] :: [atom()] | none | '_', %% durable
+ policy :: binary() | none | undefined | '_', %% durable, implicit
+ %% update as above
+ operator_policy :: binary() | none | undefined | '_', %% durable,
+ %% implicit
+ %% update
+ %% as above
+ gm_pids = [] :: [{pid(), pid()}] | none | '_', %% transient
+ decorators :: [atom()] | none | undefined | '_', %% transient,
+ %% recalculated
+ %% as above
+ state = live :: atom() | none | '_', %% durable (have we crashed?)
+ policy_version = 0 :: non_neg_integer() | '_',
+ slave_pids_pending_shutdown = [] :: [pid()] | '_',
+ vhost :: rabbit_types:vhost() | undefined | '_', %% secondary index
+ options = #{} :: map() | '_'
+ }).
+
+-type amqqueue() :: amqqueue_v1().
+-type amqqueue_v1() :: #amqqueue{
+ name :: rabbit_amqqueue:name(),
+ durable :: boolean(),
+ auto_delete :: boolean(),
+ exclusive_owner :: pid() | none,
+ arguments :: rabbit_framing:amqp_table(),
+ pid :: pid() | none,
+ slave_pids :: [pid()] | none,
+ sync_slave_pids :: [pid()] | none,
+ recoverable_slaves :: [atom()] | none,
+ policy :: binary() | none | undefined,
+ operator_policy :: binary() | none | undefined,
+ gm_pids :: [{pid(), pid()}] | none,
+ decorators :: [atom()] | none | undefined,
+ state :: atom() | none,
+ policy_version :: non_neg_integer(),
+ slave_pids_pending_shutdown :: [pid()],
+ vhost :: rabbit_types:vhost() | undefined,
+ options :: map()
+ }.
+
+-type amqqueue_pattern() :: amqqueue_v1_pattern().
+-type amqqueue_v1_pattern() :: #amqqueue{
+ name :: rabbit_amqqueue:name() | '_',
+ durable :: '_',
+ auto_delete :: '_',
+ exclusive_owner :: '_',
+ arguments :: '_',
+ pid :: '_',
+ slave_pids :: '_',
+ sync_slave_pids :: '_',
+ recoverable_slaves :: '_',
+ policy :: '_',
+ operator_policy :: '_',
+ gm_pids :: '_',
+ decorators :: '_',
+ state :: '_',
+ policy_version :: '_',
+ slave_pids_pending_shutdown :: '_',
+ vhost :: '_',
+ options :: '_'
+ }.
+
+-export_type([amqqueue/0,
+ amqqueue_v1/0,
+ amqqueue_pattern/0,
+ amqqueue_v1_pattern/0]).
+
+-spec new(rabbit_amqqueue:name(),
+ pid() | none,
+ boolean(),
+ boolean(),
+ pid() | none,
+ rabbit_framing:amqp_table(),
+ rabbit_types:vhost() | undefined,
+ map()) -> amqqueue().
+
+new(#resource{kind = queue} = Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options)
+ when (is_pid(Pid) orelse Pid =:= none) andalso
+ is_boolean(Durable) andalso
+ is_boolean(AutoDelete) andalso
+ (is_pid(Owner) orelse Owner =:= none) andalso
+ is_list(Args) andalso
+ (is_binary(VHost) orelse VHost =:= undefined) andalso
+ is_map(Options) ->
+ new_with_version(
+ ?record_version,
+ Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options).
+
+-spec new(rabbit_amqqueue:name(),
+ pid() | none,
+ boolean(),
+ boolean(),
+ pid() | none,
+ rabbit_framing:amqp_table(),
+ rabbit_types:vhost() | undefined,
+ map(),
+ ?amqqueue_v1_type | classic) -> amqqueue().
+
+new(#resource{kind = queue} = Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ Type)
+ when (is_pid(Pid) orelse Pid =:= none) andalso
+ is_boolean(Durable) andalso
+ is_boolean(AutoDelete) andalso
+ (is_pid(Owner) orelse Owner =:= none) andalso
+ is_list(Args) andalso
+ (is_binary(VHost) orelse VHost =:= undefined) andalso
+ is_map(Options) andalso
+ ?is_backwards_compat_classic(Type) ->
+ new(
+ Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options).
+
+-spec new_with_version(amqqueue_v1,
+ rabbit_amqqueue:name(),
+ pid() | none,
+ boolean(),
+ boolean(),
+ pid() | none,
+ rabbit_framing:amqp_table(),
+ rabbit_types:vhost() | undefined,
+ map()) -> amqqueue().
+
+new_with_version(?record_version,
+ #resource{kind = queue} = Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options)
+ when (is_pid(Pid) orelse Pid =:= none) andalso
+ is_boolean(Durable) andalso
+ is_boolean(AutoDelete) andalso
+ (is_pid(Owner) orelse Owner =:= none) andalso
+ is_list(Args) andalso
+ (is_binary(VHost) orelse VHost =:= undefined) andalso
+ is_map(Options) ->
+ #amqqueue{name = Name,
+ durable = Durable,
+ auto_delete = AutoDelete,
+ arguments = Args,
+ exclusive_owner = Owner,
+ pid = Pid,
+ vhost = VHost,
+ options = Options}.
+
+-spec new_with_version(amqqueue_v1,
+ rabbit_amqqueue:name(),
+ pid() | none,
+ boolean(),
+ boolean(),
+ pid() | none,
+ rabbit_framing:amqp_table(),
+ rabbit_types:vhost() | undefined,
+ map(),
+ ?amqqueue_v1_type | classic) -> amqqueue().
+
+new_with_version(?record_version,
+ #resource{kind = queue} = Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options,
+ Type)
+ when (is_pid(Pid) orelse Pid =:= none) andalso
+ is_boolean(Durable) andalso
+ is_boolean(AutoDelete) andalso
+ (is_pid(Owner) orelse Owner =:= none) andalso
+ is_list(Args) andalso
+ (is_binary(VHost) orelse VHost =:= undefined) andalso
+ is_map(Options) andalso
+ ?is_backwards_compat_classic(Type) ->
+ new_with_version(
+ ?record_version,
+ Name,
+ Pid,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ Options).
+
+-spec is_amqqueue(any()) -> boolean().
+
+is_amqqueue(#amqqueue{}) -> true;
+is_amqqueue(_) -> false.
+
+-spec record_version_to_use() -> amqqueue_v1.
+
+record_version_to_use() ->
+ ?record_version.
+
+-spec upgrade(amqqueue()) -> amqqueue().
+
+upgrade(#amqqueue{} = Queue) -> Queue.
+
+-spec upgrade_to(amqqueue_v1, amqqueue()) -> amqqueue().
+
+upgrade_to(?record_version, #amqqueue{} = Queue) ->
+ Queue.
+
+% arguments
+
+-spec get_arguments(amqqueue()) -> rabbit_framing:amqp_table().
+
+get_arguments(#amqqueue{arguments = Args}) -> Args.
+
+-spec set_arguments(amqqueue(), rabbit_framing:amqp_table()) -> amqqueue().
+
+set_arguments(#amqqueue{} = Queue, Args) ->
+ Queue#amqqueue{arguments = Args}.
+
+% decorators
+
+-spec get_decorators(amqqueue()) -> [atom()] | none | undefined.
+
+get_decorators(#amqqueue{decorators = Decorators}) -> Decorators.
+
+-spec set_decorators(amqqueue(), [atom()] | none | undefined) -> amqqueue().
+
+set_decorators(#amqqueue{} = Queue, Decorators) ->
+ Queue#amqqueue{decorators = Decorators}.
+
+-spec get_exclusive_owner(amqqueue()) -> pid() | none.
+
+get_exclusive_owner(#amqqueue{exclusive_owner = Owner}) -> Owner.
+
+% gm_pids
+
+-spec get_gm_pids(amqqueue()) -> [{pid(), pid()}] | none.
+
+get_gm_pids(#amqqueue{gm_pids = GMPids}) -> GMPids.
+
+-spec set_gm_pids(amqqueue(), [{pid(), pid()}] | none) -> amqqueue().
+
+set_gm_pids(#amqqueue{} = Queue, GMPids) ->
+ Queue#amqqueue{gm_pids = GMPids}.
+
+-spec get_leader(amqqueue_v1()) -> no_return().
+
+get_leader(_) -> throw({unsupported, ?record_version, get_leader}).
+
+% operator_policy
+
+-spec get_operator_policy(amqqueue()) -> binary() | none | undefined.
+
+get_operator_policy(#amqqueue{operator_policy = OpPolicy}) -> OpPolicy.
+
+-spec set_operator_policy(amqqueue(), binary() | none | undefined) ->
+ amqqueue().
+
+set_operator_policy(#amqqueue{} = Queue, OpPolicy) ->
+ Queue#amqqueue{operator_policy = OpPolicy}.
+
+% name
+
+-spec get_name(amqqueue()) -> rabbit_amqqueue:name().
+
+get_name(#amqqueue{name = Name}) -> Name.
+
+-spec set_name(amqqueue(), rabbit_amqqueue:name()) -> amqqueue().
+
+set_name(#amqqueue{} = Queue, Name) ->
+ Queue#amqqueue{name = Name}.
+
+-spec get_options(amqqueue()) -> map().
+
+get_options(#amqqueue{options = Options}) -> Options.
+
+% pid
+
+-spec get_pid
+(amqqueue_v1:amqqueue_v1()) -> pid() | none.
+
+get_pid(#amqqueue{pid = Pid}) -> Pid.
+
+-spec set_pid
+(amqqueue_v1:amqqueue_v1(), pid() | none) -> amqqueue_v1:amqqueue_v1().
+
+set_pid(#amqqueue{} = Queue, Pid) ->
+ Queue#amqqueue{pid = Pid}.
+
+% policy
+
+-spec get_policy(amqqueue()) -> proplists:proplist() | none | undefined.
+
+get_policy(#amqqueue{policy = Policy}) -> Policy.
+
+-spec set_policy(amqqueue(), binary() | none | undefined) -> amqqueue().
+
+set_policy(#amqqueue{} = Queue, Policy) ->
+ Queue#amqqueue{policy = Policy}.
+
+% policy_version
+
+-spec get_policy_version(amqqueue()) -> non_neg_integer().
+
+get_policy_version(#amqqueue{policy_version = PV}) ->
+ PV.
+
+-spec set_policy_version(amqqueue(), non_neg_integer()) -> amqqueue().
+
+set_policy_version(#amqqueue{} = Queue, PV) ->
+ Queue#amqqueue{policy_version = PV}.
+
+% recoverable_slaves
+
+-spec get_recoverable_slaves(amqqueue()) -> [atom()] | none.
+
+get_recoverable_slaves(#amqqueue{recoverable_slaves = Slaves}) ->
+ Slaves.
+
+-spec set_recoverable_slaves(amqqueue(), [atom()] | none) -> amqqueue().
+
+set_recoverable_slaves(#amqqueue{} = Queue, Slaves) ->
+ Queue#amqqueue{recoverable_slaves = Slaves}.
+
+% type_state (new in v2)
+
+-spec get_type_state(amqqueue()) -> no_return().
+
+get_type_state(_) -> throw({unsupported, ?record_version, get_type_state}).
+
+-spec set_type_state(amqqueue(), [node()]) -> no_return().
+
+set_type_state(_, _) ->
+ throw({unsupported, ?record_version, set_type_state}).
+
+% slave_pids
+
+get_slave_pids(#amqqueue{slave_pids = Slaves}) ->
+ Slaves.
+
+set_slave_pids(#amqqueue{} = Queue, SlavePids) ->
+ Queue#amqqueue{slave_pids = SlavePids}.
+
+% slave_pids_pending_shutdown
+
+get_slave_pids_pending_shutdown(
+ #amqqueue{slave_pids_pending_shutdown = Slaves}) ->
+ Slaves.
+
+set_slave_pids_pending_shutdown(#amqqueue{} = Queue, SlavePids) ->
+ Queue#amqqueue{slave_pids_pending_shutdown = SlavePids}.
+
+% state
+
+-spec get_state(amqqueue()) -> atom() | none.
+
+get_state(#amqqueue{state = State}) -> State.
+
+-spec set_state(amqqueue(), atom() | none) -> amqqueue().
+
+set_state(#amqqueue{} = Queue, State) ->
+ Queue#amqqueue{state = State}.
+
+% sync_slave_pids
+
+-spec get_sync_slave_pids(amqqueue()) -> [pid()] | none.
+
+get_sync_slave_pids(#amqqueue{sync_slave_pids = Pids}) ->
+ Pids.
+
+-spec set_sync_slave_pids(amqqueue(), [pid()] | none) -> amqqueue().
+
+set_sync_slave_pids(#amqqueue{} = Queue, Pids) ->
+ Queue#amqqueue{sync_slave_pids = Pids}.
+
+%% New in v2.
+
+-spec get_type(amqqueue()) -> atom().
+
+get_type(Queue) when ?is_amqqueue(Queue) -> ?amqqueue_v1_type.
+
+-spec get_vhost(amqqueue()) -> rabbit_types:vhost() | undefined.
+
+get_vhost(#amqqueue{vhost = VHost}) -> VHost.
+
+-spec is_auto_delete(amqqueue()) -> boolean().
+
+is_auto_delete(#amqqueue{auto_delete = AutoDelete}) -> AutoDelete.
+
+-spec is_durable(amqqueue()) -> boolean().
+
+is_durable(#amqqueue{durable = Durable}) -> Durable.
+
+-spec is_classic(amqqueue()) -> boolean().
+
+is_classic(Queue) ->
+ get_type(Queue) =:= ?amqqueue_v1_type.
+
+-spec is_quorum(amqqueue()) -> boolean().
+
+is_quorum(Queue) when ?is_amqqueue(Queue) ->
+ false.
+
+fields() -> fields(?record_version).
+
+fields(?record_version) -> record_info(fields, amqqueue).
+
+field_vhost() -> #amqqueue.vhost.
+
+-spec pattern_match_all() -> amqqueue_pattern().
+
+pattern_match_all() -> #amqqueue{_ = '_'}.
+
+-spec pattern_match_on_name(rabbit_amqqueue:name()) ->
+ amqqueue_pattern().
+
+pattern_match_on_name(Name) -> #amqqueue{name = Name, _ = '_'}.
+
+-spec pattern_match_on_type(atom()) -> no_return().
+
+pattern_match_on_type(_) ->
+ throw({unsupported, ?record_version, pattern_match_on_type}).
+
+reset_mirroring_and_decorators(#amqqueue{} = Queue) ->
+ Queue#amqqueue{slave_pids = [],
+ sync_slave_pids = [],
+ gm_pids = [],
+ decorators = undefined}.
+
+set_immutable(#amqqueue{} = Queue) ->
+ Queue#amqqueue{pid = none,
+ slave_pids = none,
+ sync_slave_pids = none,
+ recoverable_slaves = none,
+ gm_pids = none,
+ policy = none,
+ decorators = none,
+ state = none}.
+
+-spec qnode(amqqueue() | pid()) -> node().
+
+qnode(Queue) when ?is_amqqueue(Queue) ->
+ QPid = get_pid(Queue),
+ qnode(QPid);
+qnode(QPid) when is_pid(QPid) ->
+ node(QPid).
+
+macros() ->
+ io:format(
+ "-define(is_~s(Q), is_record(Q, amqqueue, ~b)).~n~n",
+ [?record_version, record_info(size, amqqueue)]),
+ %% The field number starts at 2 because the first element is the
+ %% record name.
+ macros(record_info(fields, amqqueue), 2).
+
+macros([Field | Rest], I) ->
+ io:format(
+ "-define(~s_field_~s(Q), element(~b, Q)).~n",
+ [?record_version, Field, I]),
+ macros(Rest, I + 1);
+macros([], _) ->
+ ok.
diff --git a/deps/rabbit/src/background_gc.erl b/deps/rabbit/src/background_gc.erl
new file mode 100644
index 0000000000..be5bf0c995
--- /dev/null
+++ b/deps/rabbit/src/background_gc.erl
@@ -0,0 +1,78 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(background_gc).
+
+-behaviour(gen_server2).
+
+-export([start_link/0, run/0]).
+-export([gc/0]). %% For run_interval only
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-define(MAX_RATIO, 0.01).
+-define(MAX_INTERVAL, 240000).
+
+-record(state, {last_interval}).
+
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> {'ok', pid()} | {'error', any()}.
+
+start_link() -> gen_server2:start_link({local, ?MODULE}, ?MODULE, [],
+ [{timeout, infinity}]).
+
+-spec run() -> 'ok'.
+
+run() -> gen_server2:cast(?MODULE, run).
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+ {ok, IdealInterval} = application:get_env(rabbit, background_gc_target_interval),
+ {ok, interval_gc(#state{last_interval = IdealInterval})}.
+
+handle_call(Msg, _From, State) ->
+ {stop, {unexpected_call, Msg}, {unexpected_call, Msg}, State}.
+
+handle_cast(run, State) -> gc(), {noreply, State};
+
+handle_cast(Msg, State) -> {stop, {unexpected_cast, Msg}, State}.
+
+handle_info(run, State) -> {noreply, interval_gc(State)};
+
+handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}.
+
+code_change(_OldVsn, State, _Extra) -> {ok, State}.
+
+terminate(_Reason, State) -> State.
+
+%%----------------------------------------------------------------------------
+
+interval_gc(State = #state{last_interval = LastInterval}) ->
+ {ok, IdealInterval} = application:get_env(rabbit, background_gc_target_interval),
+ {ok, Interval} = rabbit_misc:interval_operation(
+ {?MODULE, gc, []},
+ ?MAX_RATIO, ?MAX_INTERVAL, IdealInterval, LastInterval),
+ erlang:send_after(Interval, self(), run),
+ State#state{last_interval = Interval}.
+
+-spec gc() -> 'ok'.
+
+gc() ->
+ Enabled = rabbit_misc:get_env(rabbit, background_gc_enabled, false),
+ case Enabled of
+ true ->
+ [garbage_collect(P) || P <- processes(),
+ {status, waiting} == process_info(P, status)],
+ %% since we will never be waiting...
+ garbage_collect();
+ false ->
+ ok
+ end,
+ ok.
diff --git a/deps/rabbit/src/code_server_cache.erl b/deps/rabbit/src/code_server_cache.erl
new file mode 100644
index 0000000000..b53f5dcee9
--- /dev/null
+++ b/deps/rabbit/src/code_server_cache.erl
@@ -0,0 +1,81 @@
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+%% ex: ts=4 sw=4 et
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(code_server_cache).
+
+-behaviour(gen_server).
+
+%% API
+-export([start_link/0,
+ maybe_call_mfa/4]).
+
+%% gen_server callbacks
+-export([init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ terminate/2,
+ code_change/3]).
+
+-record(state, {
+ modules = #{} :: #{atom() => boolean()}
+}).
+
+%% API
+start_link() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+maybe_call_mfa(Module, Function, Args, Default) ->
+ gen_server:call(?MODULE, {maybe_call_mfa, {Module, Function, Args, Default}}).
+
+%% gen_server callbacks
+
+init([]) ->
+ {ok, #state{}}.
+
+handle_call({maybe_call_mfa, {Mod, _F, _A, _D} = MFA}, _From, #state{modules = ModuleMap} = State0) ->
+ Value = maps:get(Mod, ModuleMap, true),
+ {ok, Reply, State1} = handle_maybe_call_mfa(Value, MFA, State0),
+ {reply, Reply, State1};
+handle_call(_Request, _From, State) ->
+ {reply, ignored, State}.
+
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%% Internal functions
+
+handle_maybe_call_mfa(false, {_M, _F, _A, Default}, State) ->
+ {ok, Default, State};
+handle_maybe_call_mfa(true, {Module, Function, Args, Default}, State) ->
+ try
+ Reply = erlang:apply(Module, Function, Args),
+ {ok, Reply, State}
+ catch
+ error:undef ->
+ handle_maybe_call_mfa_error(Module, Default, State);
+ Err:Reason ->
+ rabbit_log:error("Calling ~p:~p failed: ~p:~p~n",
+ [Module, Function, Err, Reason]),
+ handle_maybe_call_mfa_error(Module, Default, State)
+ end.
+
+handle_maybe_call_mfa_error(Module, Default, #state{modules = ModuleMap0} = State0) ->
+ ModuleMap1 = maps:put(Module, false, ModuleMap0),
+ State1 = State0#state{modules = ModuleMap1},
+ {ok, Default, State1}.
diff --git a/deps/rabbit/src/gatherer.erl b/deps/rabbit/src/gatherer.erl
new file mode 100644
index 0000000000..2b46ec02b1
--- /dev/null
+++ b/deps/rabbit/src/gatherer.erl
@@ -0,0 +1,151 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(gatherer).
+
+%% Gatherer is a queue which has producer and consumer processes. Before producers
+%% push items to the queue using gatherer:in/2 they need to declare their intent
+%% to do so with gatherer:fork/1. When a publisher's work is done, it states so
+%% using gatherer:finish/1.
+%%
+%% Consumers pop messages off queues with gatherer:out/1. If a queue is empty
+%% and there are producers that haven't finished working, the caller is blocked
+%% until an item is available. If there are no active producers, gatherer:out/1
+%% immediately returns 'empty'.
+%%
+%% This module is primarily used to collect results from asynchronous tasks
+%% running in a worker pool, e.g. when recovering bindings or rebuilding
+%% message store indices.
+
+-behaviour(gen_server2).
+
+-export([start_link/0, stop/1, fork/1, finish/1, in/2, sync_in/2, out/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+%%----------------------------------------------------------------------------
+
+-define(HIBERNATE_AFTER_MIN, 1000).
+-define(DESIRED_HIBERNATE, 10000).
+
+%%----------------------------------------------------------------------------
+
+-record(gstate, { forks, values, blocked }).
+
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ gen_server2:start_link(?MODULE, [], [{timeout, infinity}]).
+
+-spec stop(pid()) -> 'ok'.
+
+stop(Pid) ->
+ unlink(Pid),
+ gen_server2:call(Pid, stop, infinity).
+
+-spec fork(pid()) -> 'ok'.
+
+fork(Pid) ->
+ gen_server2:call(Pid, fork, infinity).
+
+-spec finish(pid()) -> 'ok'.
+
+finish(Pid) ->
+ gen_server2:cast(Pid, finish).
+
+-spec in(pid(), any()) -> 'ok'.
+
+in(Pid, Value) ->
+ gen_server2:cast(Pid, {in, Value}).
+
+-spec sync_in(pid(), any()) -> 'ok'.
+
+sync_in(Pid, Value) ->
+ gen_server2:call(Pid, {in, Value}, infinity).
+
+-spec out(pid()) -> {'value', any()} | 'empty'.
+
+out(Pid) ->
+ gen_server2:call(Pid, out, infinity).
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+ {ok, #gstate { forks = 0, values = queue:new(), blocked = queue:new() },
+ hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call(stop, _From, State) ->
+ {stop, normal, ok, State};
+
+handle_call(fork, _From, State = #gstate { forks = Forks }) ->
+ {reply, ok, State #gstate { forks = Forks + 1 }, hibernate};
+
+handle_call({in, Value}, From, State) ->
+ {noreply, in(Value, From, State), hibernate};
+
+handle_call(out, From, State = #gstate { forks = Forks,
+ values = Values,
+ blocked = Blocked }) ->
+ case queue:out(Values) of
+ {empty, _} when Forks == 0 ->
+ {reply, empty, State, hibernate};
+ {empty, _} ->
+ {noreply, State #gstate { blocked = queue:in(From, Blocked) },
+ hibernate};
+ {{value, {PendingIn, Value}}, NewValues} ->
+ reply(PendingIn, ok),
+ {reply, {value, Value}, State #gstate { values = NewValues },
+ hibernate}
+ end;
+
+handle_call(Msg, _From, State) ->
+ {stop, {unexpected_call, Msg}, State}.
+
+handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) ->
+ NewForks = Forks - 1,
+ NewBlocked = case NewForks of
+ 0 -> _ = [gen_server2:reply(From, empty) ||
+ From <- queue:to_list(Blocked)],
+ queue:new();
+ _ -> Blocked
+ end,
+ {noreply, State #gstate { forks = NewForks, blocked = NewBlocked },
+ hibernate};
+
+handle_cast({in, Value}, State) ->
+ {noreply, in(Value, undefined, State), hibernate};
+
+handle_cast(Msg, State) ->
+ {stop, {unexpected_cast, Msg}, State}.
+
+handle_info(Msg, State) ->
+ {stop, {unexpected_info, Msg}, State}.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+terminate(_Reason, State) ->
+ State.
+
+%%----------------------------------------------------------------------------
+
+in(Value, From, State = #gstate { values = Values, blocked = Blocked }) ->
+ case queue:out(Blocked) of
+ {empty, _} ->
+ State #gstate { values = queue:in({From, Value}, Values) };
+ {{value, PendingOut}, NewBlocked} ->
+ reply(From, ok),
+ gen_server2:reply(PendingOut, {value, Value}),
+ State #gstate { blocked = NewBlocked }
+ end.
+
+reply(undefined, _Reply) -> ok;
+reply(From, Reply) -> gen_server2:reply(From, Reply).
diff --git a/deps/rabbit/src/gm.erl b/deps/rabbit/src/gm.erl
new file mode 100644
index 0000000000..af24a2958a
--- /dev/null
+++ b/deps/rabbit/src/gm.erl
@@ -0,0 +1,1650 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(gm).
+
+%% Guaranteed Multicast
+%% ====================
+%%
+%% This module provides the ability to create named groups of
+%% processes to which members can be dynamically added and removed,
+%% and for messages to be broadcast within the group that are
+%% guaranteed to reach all members of the group during the lifetime of
+%% the message. The lifetime of a message is defined as being, at a
+%% minimum, the time from which the message is first sent to any
+%% member of the group, up until the time at which it is known by the
+%% member who published the message that the message has reached all
+%% group members.
+%%
+%% The guarantee given is that provided a message, once sent, makes it
+%% to members who do not all leave the group, the message will
+%% continue to propagate to all group members.
+%%
+%% Another way of stating the guarantee is that if member P publishes
+%% messages m and m', then for all members P', if P' is a member of
+%% the group prior to the publication of m, and P' receives m', then
+%% P' will receive m.
+%%
+%% Note that only local-ordering is enforced: i.e. if member P sends
+%% message m and then message m', then for-all members P', if P'
+%% receives m and m', then they will receive m' after m. Causality
+%% ordering is _not_ enforced. I.e. if member P receives message m
+%% and as a result publishes message m', there is no guarantee that
+%% other members P' will receive m before m'.
+%%
+%%
+%% API Use
+%% -------
+%%
+%% Mnesia must be started. Use the idempotent create_tables/0 function
+%% to create the tables required.
+%%
+%% start_link/3
+%% Provide the group name, the callback module name, and any arguments
+%% you wish to be passed into the callback module's functions. The
+%% joined/2 function will be called when we have joined the group,
+%% with the arguments passed to start_link and a list of the current
+%% members of the group. See the callbacks specs and the comments
+%% below for further details of the callback functions.
+%%
+%% leave/1
+%% Provide the Pid. Removes the Pid from the group. The callback
+%% handle_terminate/2 function will be called.
+%%
+%% broadcast/2
+%% Provide the Pid and a Message. The message will be sent to all
+%% members of the group as per the guarantees given above. This is a
+%% cast and the function call will return immediately. There is no
+%% guarantee that the message will reach any member of the group.
+%%
+%% confirmed_broadcast/2
+%% Provide the Pid and a Message. As per broadcast/2 except that this
+%% is a call, not a cast, and only returns 'ok' once the Message has
+%% reached every member of the group. Do not call
+%% confirmed_broadcast/2 directly from the callback module otherwise
+%% you will deadlock the entire group.
+%%
+%% info/1
+%% Provide the Pid. Returns a proplist with various facts, including
+%% the group name and the current group members.
+%%
+%% validate_members/2
+%% Check whether a given member list agrees with the chosen member's
+%% view. Any differences will be communicated via the members_changed
+%% callback. If there are no differences then there will be no reply.
+%% Note that members will not necessarily share the same view.
+%%
+%% forget_group/1
+%% Provide the group name. Removes its mnesia record. Makes no attempt
+%% to ensure the group is empty.
+%%
+%% Implementation Overview
+%% -----------------------
+%%
+%% One possible means of implementation would be a fan-out from the
+%% sender to every member of the group. This would require that the
+%% group is fully connected, and, in the event that the original
+%% sender of the message disappears from the group before the message
+%% has made it to every member of the group, raises questions as to
+%% who is responsible for sending on the message to new group members.
+%% In particular, the issue is with [ Pid ! Msg || Pid <- Members ] -
+%% if the sender dies part way through, who is responsible for
+%% ensuring that the remaining Members receive the Msg? In the event
+%% that within the group, messages sent are broadcast from a subset of
+%% the members, the fan-out arrangement has the potential to
+%% substantially impact the CPU and network workload of such members,
+%% as such members would have to accommodate the cost of sending each
+%% message to every group member.
+%%
+%% Instead, if the members of the group are arranged in a chain, then
+%% it becomes easier to reason about who within the group has received
+%% each message and who has not. It eases issues of responsibility: in
+%% the event of a group member disappearing, the nearest upstream
+%% member of the chain is responsible for ensuring that messages
+%% continue to propagate down the chain. It also results in equal
+%% distribution of sending and receiving workload, even if all
+%% messages are being sent from just a single group member. This
+%% configuration has the further advantage that it is not necessary
+%% for every group member to know of every other group member, and
+%% even that a group member does not have to be accessible from all
+%% other group members.
+%%
+%% Performance is kept high by permitting pipelining and all
+%% communication between joined group members is asynchronous. In the
+%% chain A -> B -> C -> D, if A sends a message to the group, it will
+%% not directly contact C or D. However, it must know that D receives
+%% the message (in addition to B and C) before it can consider the
+%% message fully sent. A simplistic implementation would require that
+%% D replies to C, C replies to B and B then replies to A. This would
+%% result in a propagation delay of twice the length of the chain. It
+%% would also require, in the event of the failure of C, that D knows
+%% to directly contact B and issue the necessary replies. Instead, the
+%% chain forms a ring: D sends the message on to A: D does not
+%% distinguish A as the sender, merely as the next member (downstream)
+%% within the chain (which has now become a ring). When A receives
+%% from D messages that A sent, it knows that all members have
+%% received the message. However, the message is not dead yet: if C
+%% died as B was sending to C, then B would need to detect the death
+%% of C and forward the message on to D instead: thus every node has
+%% to remember every message published until it is told that it can
+%% forget about the message. This is essential not just for dealing
+%% with failure of members, but also for the addition of new members.
+%%
+%% Thus once A receives the message back again, it then sends to B an
+%% acknowledgement for the message, indicating that B can now forget
+%% about the message. B does so, and forwards the ack to C. C forgets
+%% the message, and forwards the ack to D, which forgets the message
+%% and finally forwards the ack back to A. At this point, A takes no
+%% further action: the message and its acknowledgement have made it to
+%% every member of the group. The message is now dead, and any new
+%% member joining the group at this point will not receive the
+%% message.
+%%
+%% We therefore have two roles:
+%%
+%% 1. The sender, who upon receiving their own messages back, must
+%% then send out acknowledgements, and upon receiving their own
+%% acknowledgements back perform no further action.
+%%
+%% 2. The other group members who upon receiving messages and
+%% acknowledgements must update their own internal state accordingly
+%% (the sending member must also do this in order to be able to
+%% accommodate failures), and forwards messages on to their downstream
+%% neighbours.
+%%
+%%
+%% Implementation: It gets trickier
+%% --------------------------------
+%%
+%% Chain A -> B -> C -> D
+%%
+%% A publishes a message which B receives. A now dies. B and D will
+%% detect the death of A, and will link up, thus the chain is now B ->
+%% C -> D. B forwards A's message on to C, who forwards it to D, who
+%% forwards it to B. Thus B is now responsible for A's messages - both
+%% publications and acknowledgements that were in flight at the point
+%% at which A died. Even worse is that this is transitive: after B
+%% forwards A's message to C, B dies as well. Now C is not only
+%% responsible for B's in-flight messages, but is also responsible for
+%% A's in-flight messages.
+%%
+%% Lemma 1: A member can only determine which dead members they have
+%% inherited responsibility for if there is a total ordering on the
+%% conflicting additions and subtractions of members from the group.
+%%
+%% Consider the simultaneous death of B and addition of B' that
+%% transitions a chain from A -> B -> C to A -> B' -> C. Either B' or
+%% C is responsible for in-flight messages from B. It is easy to
+%% ensure that at least one of them thinks they have inherited B, but
+%% if we do not ensure that exactly one of them inherits B, then we
+%% could have B' converting publishes to acks, which then will crash C
+%% as C does not believe it has issued acks for those messages.
+%%
+%% More complex scenarios are easy to concoct: A -> B -> C -> D -> E
+%% becoming A -> C' -> E. Who has inherited which of B, C and D?
+%%
+%% However, for non-conflicting membership changes, only a partial
+%% ordering is required. For example, A -> B -> C becoming A -> A' ->
+%% B. The addition of A', between A and B can have no conflicts with
+%% the death of C: it is clear that A has inherited C's messages.
+%%
+%% For ease of implementation, we adopt the simple solution, of
+%% imposing a total order on all membership changes.
+%%
+%% On the death of a member, it is ensured the dead member's
+%% neighbours become aware of the death, and the upstream neighbour
+%% now sends to its new downstream neighbour its state, including the
+%% messages pending acknowledgement. The downstream neighbour can then
+%% use this to calculate which publishes and acknowledgements it has
+%% missed out on, due to the death of its old upstream. Thus the
+%% downstream can catch up, and continues the propagation of messages
+%% through the group.
+%%
+%% Lemma 2: When a member is joining, it must synchronously
+%% communicate with its upstream member in order to receive its
+%% starting state atomically with its addition to the group.
+%%
+%% New members must start with the same state as their nearest
+%% upstream neighbour. This ensures that it is not surprised by
+%% acknowledgements they are sent, and that should their downstream
+%% neighbour die, they are able to send the correct state to their new
+%% downstream neighbour to ensure it can catch up. Thus in the
+%% transition A -> B -> C becomes A -> A' -> B -> C becomes A -> A' ->
+%% C, A' must start with the state of A, so that it can send C the
+%% correct state when B dies, allowing C to detect any missed
+%% messages.
+%%
+%% If A' starts by adding itself to the group membership, A could then
+%% die, without A' having received the necessary state from A. This
+%% would leave A' responsible for in-flight messages from A, but
+%% having the least knowledge of all, of those messages. Thus A' must
+%% start by synchronously calling A, which then immediately sends A'
+%% back its state. A then adds A' to the group. If A dies at this
+%% point then A' will be able to see this (as A' will fail to appear
+%% in the group membership), and thus A' will ignore the state it
+%% receives from A, and will simply repeat the process, trying to now
+%% join downstream from some other member. This ensures that should
+%% the upstream die as soon as the new member has been joined, the new
+%% member is guaranteed to receive the correct state, allowing it to
+%% correctly process messages inherited due to the death of its
+%% upstream neighbour.
+%%
+%% The canonical definition of the group membership is held by a
+%% distributed database. Whilst this allows the total ordering of
+%% changes to be achieved, it is nevertheless undesirable to have to
+%% query this database for the current view, upon receiving each
+%% message. Instead, we wish for members to be able to cache a view of
+%% the group membership, which then requires a cache invalidation
+%% mechanism. Each member maintains its own view of the group
+%% membership. Thus when the group's membership changes, members may
+%% need to become aware of such changes in order to be able to
+%% accurately process messages they receive. Because of the
+%% requirement of a total ordering of conflicting membership changes,
+%% it is not possible to use the guaranteed broadcast mechanism to
+%% communicate these changes: to achieve the necessary ordering, it
+%% would be necessary for such messages to be published by exactly one
+%% member, which can not be guaranteed given that such a member could
+%% die.
+%%
+%% The total ordering we enforce on membership changes gives rise to a
+%% view version number: every change to the membership creates a
+%% different view, and the total ordering permits a simple
+%% monotonically increasing view version number.
+%%
+%% Lemma 3: If a message is sent from a member that holds view version
+%% N, it can be correctly processed by any member receiving the
+%% message with a view version >= N.
+%%
+%% Initially, let us suppose that each view contains the ordering of
+%% every member that was ever part of the group. Dead members are
+%% marked as such. Thus we have a ring of members, some of which are
+%% dead, and are thus inherited by the nearest alive downstream
+%% member.
+%%
+%% In the chain A -> B -> C, all three members initially have view
+%% version 1, which reflects reality. B publishes a message, which is
+%% forward by C to A. B now dies, which A notices very quickly. Thus A
+%% updates the view, creating version 2. It now forwards B's
+%% publication, sending that message to its new downstream neighbour,
+%% C. This happens before C is aware of the death of B. C must become
+%% aware of the view change before it interprets the message its
+%% received, otherwise it will fail to learn of the death of B, and
+%% thus will not realise it has inherited B's messages (and will
+%% likely crash).
+%%
+%% Thus very simply, we have that each subsequent view contains more
+%% information than the preceding view.
+%%
+%% However, to avoid the views growing indefinitely, we need to be
+%% able to delete members which have died _and_ for which no messages
+%% are in-flight. This requires that upon inheriting a dead member, we
+%% know the last publication sent by the dead member (this is easy: we
+%% inherit a member because we are the nearest downstream member which
+%% implies that we know at least as much than everyone else about the
+%% publications of the dead member), and we know the earliest message
+%% for which the acknowledgement is still in flight.
+%%
+%% In the chain A -> B -> C, when B dies, A will send to C its state
+%% (as C is the new downstream from A), allowing C to calculate which
+%% messages it has missed out on (described above). At this point, C
+%% also inherits B's messages. If that state from A also includes the
+%% last message published by B for which an acknowledgement has been
+%% seen, then C knows exactly which further acknowledgements it must
+%% receive (also including issuing acknowledgements for publications
+%% still in-flight that it receives), after which it is known there
+%% are no more messages in flight for B, thus all evidence that B was
+%% ever part of the group can be safely removed from the canonical
+%% group membership.
+%%
+%% Thus, for every message that a member sends, it includes with that
+%% message its view version. When a member receives a message it will
+%% update its view from the canonical copy, should its view be older
+%% than the view version included in the message it has received.
+%%
+%% The state held by each member therefore includes the messages from
+%% each publisher pending acknowledgement, the last publication seen
+%% from that publisher, and the last acknowledgement from that
+%% publisher. In the case of the member's own publications or
+%% inherited members, this last acknowledgement seen state indicates
+%% the last acknowledgement retired, rather than sent.
+%%
+%%
+%% Proof sketch
+%% ------------
+%%
+%% We need to prove that with the provided operational semantics, we
+%% can never reach a state that is not well formed from a well-formed
+%% starting state.
+%%
+%% Operational semantics (small step): straight-forward message
+%% sending, process monitoring, state updates.
+%%
+%% Well formed state: dead members inherited by exactly one non-dead
+%% member; for every entry in anyone's pending-acks, either (the
+%% publication of the message is in-flight downstream from the member
+%% and upstream from the publisher) or (the acknowledgement of the
+%% message is in-flight downstream from the publisher and upstream
+%% from the member).
+%%
+%% Proof by induction on the applicable operational semantics.
+%%
+%%
+%% Related work
+%% ------------
+%%
+%% The ring configuration and double traversal of messages around the
+%% ring is similar (though developed independently) to the LCR
+%% protocol by [Levy 2008]. However, LCR differs in several
+%% ways. Firstly, by using vector clocks, it enforces a total order of
+%% message delivery, which is unnecessary for our purposes. More
+%% significantly, it is built on top of a "group communication system"
+%% which performs the group management functions, taking
+%% responsibility away from the protocol as to how to cope with safely
+%% adding and removing members. When membership changes do occur, the
+%% protocol stipulates that every member must perform communication
+%% with every other member of the group, to ensure all outstanding
+%% deliveries complete, before the entire group transitions to the new
+%% view. This, in total, requires two sets of all-to-all synchronous
+%% communications.
+%%
+%% This is not only rather inefficient, but also does not explain what
+%% happens upon the failure of a member during this process. It does
+%% though entirely avoid the need for inheritance of responsibility of
+%% dead members that our protocol incorporates.
+%%
+%% In [Marandi et al 2010], a Paxos-based protocol is described. This
+%% work explicitly focuses on the efficiency of communication. LCR
+%% (and our protocol too) are more efficient, but at the cost of
+%% higher latency. The Ring-Paxos protocol is itself built on top of
+%% IP-multicast, which rules it out for many applications where
+%% point-to-point communication is all that can be required. They also
+%% have an excellent related work section which I really ought to
+%% read...
+%%
+%%
+%% [Levy 2008] The Complexity of Reliable Distributed Storage, 2008.
+%% [Marandi et al 2010] Ring Paxos: A High-Throughput Atomic Broadcast
+%% Protocol
+
+
+-behaviour(gen_server2).
+
+-export([create_tables/0, start_link/4, leave/1, broadcast/2, broadcast/3,
+ confirmed_broadcast/2, info/1, validate_members/2, forget_group/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3, prioritise_info/3]).
+
+%% For INSTR_MOD callbacks
+-export([call/3, cast/2, monitor/1, demonitor/1]).
+
+-export([table_definitions/0]).
+
+-define(GROUP_TABLE, gm_group).
+-define(MAX_BUFFER_SIZE, 100000000). %% 100MB
+-define(BROADCAST_TIMER, 25).
+-define(FORCE_GC_TIMER, 250).
+-define(VERSION_START, 0).
+-define(SETS, ordsets).
+
+-record(state,
+ { self,
+ left,
+ right,
+ group_name,
+ module,
+ view,
+ pub_count,
+ members_state,
+ callback_args,
+ confirms,
+ broadcast_buffer,
+ broadcast_buffer_sz,
+ broadcast_timer,
+ force_gc_timer,
+ txn_executor,
+ shutting_down
+ }).
+
+-record(gm_group, { name, version, members }).
+
+-record(view_member, { id, aliases, left, right }).
+
+-record(member, { pending_ack, last_pub, last_ack }).
+
+-define(TABLE, {?GROUP_TABLE, [{record_name, gm_group},
+ {attributes, record_info(fields, gm_group)}]}).
+-define(TABLE_MATCH, {match, #gm_group { _ = '_' }}).
+
+-define(TAG, '$gm').
+
+-export_type([group_name/0]).
+
+-type group_name() :: any().
+-type txn_fun() :: fun((fun(() -> any())) -> any()).
+
+%% The joined, members_changed and handle_msg callbacks can all return
+%% any of the following terms:
+%%
+%% 'ok' - the callback function returns normally
+%%
+%% {'stop', Reason} - the callback indicates the member should stop
+%% with reason Reason and should leave the group.
+%%
+%% {'become', Module, Args} - the callback indicates that the callback
+%% module should be changed to Module and that the callback functions
+%% should now be passed the arguments Args. This allows the callback
+%% module to be dynamically changed.
+
+%% Called when we've successfully joined the group. Supplied with Args
+%% provided in start_link, plus current group members.
+-callback joined(Args :: term(), Members :: [pid()]) ->
+ ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}.
+
+%% Supplied with Args provided in start_link, the list of new members
+%% and the list of members previously known to us that have since
+%% died. Note that if a member joins and dies very quickly, it's
+%% possible that we will never see that member appear in either births
+%% or deaths. However we are guaranteed that (1) we will see a member
+%% joining either in the births here, or in the members passed to
+%% joined/2 before receiving any messages from it; and (2) we will not
+%% see members die that we have not seen born (or supplied in the
+%% members to joined/2).
+-callback members_changed(Args :: term(),
+ Births :: [pid()], Deaths :: [pid()]) ->
+ ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}.
+
+%% Supplied with Args provided in start_link, the sender, and the
+%% message. This does get called for messages injected by this member,
+%% however, in such cases, there is no special significance of this
+%% invocation: it does not indicate that the message has made it to
+%% any other members, let alone all other members.
+-callback handle_msg(Args :: term(), From :: pid(), Message :: term()) ->
+ ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}.
+
+%% Called on gm member termination as per rules in gen_server, with
+%% the Args provided in start_link plus the termination Reason.
+-callback handle_terminate(Args :: term(), Reason :: term()) ->
+ ok | term().
+
+-spec create_tables() -> 'ok' | {'aborted', any()}.
+
+create_tables() ->
+ create_tables([?TABLE]).
+
+create_tables([]) ->
+ ok;
+create_tables([{Table, Attributes} | Tables]) ->
+ case mnesia:create_table(Table, Attributes) of
+ {atomic, ok} -> create_tables(Tables);
+ {aborted, {already_exists, Table}} -> create_tables(Tables);
+ Err -> Err
+ end.
+
+table_definitions() ->
+ {Name, Attributes} = ?TABLE,
+ [{Name, [?TABLE_MATCH | Attributes]}].
+
+-spec start_link(group_name(), atom(), any(), txn_fun()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(GroupName, Module, Args, TxnFun) ->
+ gen_server2:start_link(?MODULE, [GroupName, Module, Args, TxnFun],
+ [{spawn_opt, [{fullsweep_after, 0}]}]).
+
+-spec leave(pid()) -> 'ok'.
+
+leave(Server) ->
+ gen_server2:cast(Server, leave).
+
+-spec broadcast(pid(), any()) -> 'ok'.
+
+broadcast(Server, Msg) -> broadcast(Server, Msg, 0).
+
+broadcast(Server, Msg, SizeHint) ->
+ gen_server2:cast(Server, {broadcast, Msg, SizeHint}).
+
+-spec confirmed_broadcast(pid(), any()) -> 'ok'.
+
+confirmed_broadcast(Server, Msg) ->
+ gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity).
+
+-spec info(pid()) -> rabbit_types:infos().
+
+info(Server) ->
+ gen_server2:call(Server, info, infinity).
+
+-spec validate_members(pid(), [pid()]) -> 'ok'.
+
+validate_members(Server, Members) ->
+ gen_server2:cast(Server, {validate_members, Members}).
+
+-spec forget_group(group_name()) -> 'ok'.
+
+forget_group(GroupName) ->
+ {atomic, ok} = mnesia:sync_transaction(
+ fun () ->
+ mnesia:delete({?GROUP_TABLE, GroupName})
+ end),
+ ok.
+
+init([GroupName, Module, Args, TxnFun]) ->
+ put(process_name, {?MODULE, GroupName}),
+ Self = make_member(GroupName),
+ gen_server2:cast(self(), join),
+ {ok, #state { self = Self,
+ left = {Self, undefined},
+ right = {Self, undefined},
+ group_name = GroupName,
+ module = Module,
+ view = undefined,
+ pub_count = -1,
+ members_state = undefined,
+ callback_args = Args,
+ confirms = queue:new(),
+ broadcast_buffer = [],
+ broadcast_buffer_sz = 0,
+ broadcast_timer = undefined,
+ force_gc_timer = undefined,
+ txn_executor = TxnFun,
+ shutting_down = false }}.
+
+
+handle_call({confirmed_broadcast, _Msg}, _From,
+ State = #state { shutting_down = {true, _} }) ->
+ reply(shutting_down, State);
+
+handle_call({confirmed_broadcast, _Msg}, _From,
+ State = #state { members_state = undefined }) ->
+ reply(not_joined, State);
+
+handle_call({confirmed_broadcast, Msg}, _From,
+ State = #state { self = Self,
+ right = {Self, undefined},
+ module = Module,
+ callback_args = Args }) ->
+ handle_callback_result({Module:handle_msg(Args, get_pid(Self), Msg),
+ ok, State});
+
+handle_call({confirmed_broadcast, Msg}, From, State) ->
+ {Result, State1 = #state { pub_count = PubCount, confirms = Confirms }} =
+ internal_broadcast(Msg, 0, State),
+ Confirms1 = queue:in({PubCount, From}, Confirms),
+ handle_callback_result({Result, flush_broadcast_buffer(
+ State1 #state { confirms = Confirms1 })});
+
+handle_call(info, _From,
+ State = #state { members_state = undefined }) ->
+ reply(not_joined, State);
+
+handle_call(info, _From, State = #state { group_name = GroupName,
+ module = Module,
+ view = View }) ->
+ reply([{group_name, GroupName},
+ {module, Module},
+ {group_members, get_pids(alive_view_members(View))}], State);
+
+handle_call({add_on_right, _NewMember}, _From,
+ State = #state { members_state = undefined }) ->
+ reply(not_ready, State);
+
+handle_call({add_on_right, NewMember}, _From,
+ State = #state { self = Self,
+ group_name = GroupName,
+ members_state = MembersState,
+ txn_executor = TxnFun }) ->
+ try
+ Group = record_new_member_in_group(
+ NewMember, Self, GroupName, TxnFun),
+ View1 = group_to_view(check_membership(Self, Group)),
+ MembersState1 = remove_erased_members(MembersState, View1),
+ ok = send_right(NewMember, View1,
+ {catchup, Self, prepare_members_state(MembersState1)}),
+ {Result, State1} = change_view(View1, State #state {
+ members_state = MembersState1 }),
+ handle_callback_result({Result, {ok, Group}, State1})
+ catch
+ lost_membership ->
+ {stop, shutdown, State}
+ end.
+
+%% add_on_right causes a catchup to be sent immediately from the left,
+%% so we can never see this from the left neighbour. However, it's
+%% possible for the right neighbour to send us a check_neighbours
+%% immediately before that. We can't possibly handle it, but if we're
+%% in this state we know a catchup is coming imminently anyway. So
+%% just ignore it.
+handle_cast({?TAG, _ReqVer, check_neighbours},
+ State = #state { members_state = undefined }) ->
+ noreply(State);
+
+handle_cast({?TAG, ReqVer, Msg},
+ State = #state { view = View,
+ self = Self,
+ members_state = MembersState,
+ group_name = GroupName }) ->
+ try
+ {Result, State1} =
+ case needs_view_update(ReqVer, View) of
+ true ->
+ View1 = group_to_view(
+ check_membership(Self,
+ dirty_read_group(GroupName))),
+ MemberState1 = remove_erased_members(MembersState, View1),
+ change_view(View1, State #state {
+ members_state = MemberState1 });
+ false -> {ok, State}
+ end,
+ handle_callback_result(
+ if_callback_success(
+ Result, fun handle_msg_true/3, fun handle_msg_false/3, Msg, State1))
+ catch
+ lost_membership ->
+ {stop, shutdown, State}
+ end;
+
+handle_cast({broadcast, _Msg, _SizeHint},
+ State = #state { shutting_down = {true, _} }) ->
+ noreply(State);
+
+handle_cast({broadcast, _Msg, _SizeHint},
+ State = #state { members_state = undefined }) ->
+ noreply(State);
+
+handle_cast({broadcast, Msg, _SizeHint},
+ State = #state { self = Self,
+ right = {Self, undefined},
+ module = Module,
+ callback_args = Args }) ->
+ handle_callback_result({Module:handle_msg(Args, get_pid(Self), Msg),
+ State});
+
+handle_cast({broadcast, Msg, SizeHint}, State) ->
+ {Result, State1} = internal_broadcast(Msg, SizeHint, State),
+ handle_callback_result({Result, maybe_flush_broadcast_buffer(State1)});
+
+handle_cast(join, State = #state { self = Self,
+ group_name = GroupName,
+ members_state = undefined,
+ module = Module,
+ callback_args = Args,
+ txn_executor = TxnFun }) ->
+ try
+ View = join_group(Self, GroupName, TxnFun),
+ MembersState =
+ case alive_view_members(View) of
+ [Self] -> blank_member_state();
+ _ -> undefined
+ end,
+ State1 = check_neighbours(State #state { view = View,
+ members_state = MembersState }),
+ handle_callback_result(
+ {Module:joined(Args, get_pids(all_known_members(View))), State1})
+ catch
+ lost_membership ->
+ {stop, shutdown, State}
+ end;
+
+handle_cast({validate_members, OldMembers},
+ State = #state { view = View,
+ module = Module,
+ callback_args = Args }) ->
+ NewMembers = get_pids(all_known_members(View)),
+ Births = NewMembers -- OldMembers,
+ Deaths = OldMembers -- NewMembers,
+ case {Births, Deaths} of
+ {[], []} -> noreply(State);
+ _ -> Result = Module:members_changed(Args, Births, Deaths),
+ handle_callback_result({Result, State})
+ end;
+
+handle_cast(leave, State) ->
+ {stop, normal, State}.
+
+
+handle_info(force_gc, State) ->
+ garbage_collect(),
+ noreply(State #state { force_gc_timer = undefined });
+
+handle_info(flush, State) ->
+ noreply(
+ flush_broadcast_buffer(State #state { broadcast_timer = undefined }));
+
+handle_info(timeout, State) ->
+ noreply(flush_broadcast_buffer(State));
+
+handle_info({'DOWN', _MRef, process, _Pid, _Reason},
+ State = #state { shutting_down =
+ {true, {shutdown, ring_shutdown}} }) ->
+ noreply(State);
+handle_info({'DOWN', MRef, process, _Pid, Reason},
+ State = #state { self = Self,
+ left = Left,
+ right = Right,
+ group_name = GroupName,
+ confirms = Confirms,
+ txn_executor = TxnFun }) ->
+ try
+ check_membership(GroupName),
+ Member = case {Left, Right} of
+ {{Member1, MRef}, _} -> Member1;
+ {_, {Member1, MRef}} -> Member1;
+ _ -> undefined
+ end,
+ case {Member, Reason} of
+ {undefined, _} ->
+ noreply(State);
+ {_, {shutdown, ring_shutdown}} ->
+ noreply(State);
+ _ ->
+ %% In the event of a partial partition we could see another member
+ %% go down and then remove them from Mnesia. While they can
+ %% recover from this they'd have to restart the queue - not
+ %% ideal. So let's sleep here briefly just in case this was caused
+ %% by a partial partition; in which case by the time we record the
+ %% member death in Mnesia we will probably be in a full
+ %% partition and will not be assassinating another member.
+ timer:sleep(100),
+ View1 = group_to_view(record_dead_member_in_group(Self,
+ Member, GroupName, TxnFun, true)),
+ handle_callback_result(
+ case alive_view_members(View1) of
+ [Self] -> maybe_erase_aliases(
+ State #state {
+ members_state = blank_member_state(),
+ confirms = purge_confirms(Confirms) },
+ View1);
+ _ -> change_view(View1, State)
+ end)
+ end
+ catch
+ lost_membership ->
+ {stop, shutdown, State}
+ end;
+handle_info(_, State) ->
+ %% Discard any unexpected messages, such as late replies from neighbour_call/2
+ %% TODO: For #gm_group{} related info messages, it could be worthwhile to
+ %% change_view/2, as this might reflect an alteration in the gm group, meaning
+ %% we now need to update our state. see rabbitmq-server#914.
+ noreply(State).
+
+terminate(Reason, #state { module = Module, callback_args = Args }) ->
+ Module:handle_terminate(Args, Reason).
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+prioritise_info(flush, _Len, _State) ->
+ 1;
+%% DOWN messages should not overtake initial catchups; if they do we
+%% will receive a DOWN we do not know what to do with.
+prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _Len,
+ #state { members_state = undefined }) ->
+ 0;
+%% We should not prioritise DOWN messages from our left since
+%% otherwise the DOWN can overtake any last activity from the left,
+%% causing that activity to be lost.
+prioritise_info({'DOWN', _MRef, process, LeftPid, _Reason}, _Len,
+ #state { left = {{_LeftVer, LeftPid}, _MRef2} }) ->
+ 0;
+%% But prioritise all other DOWNs - we want to make sure we are not
+%% sending activity into the void for too long because our right is
+%% down but we don't know it.
+prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _Len, _State) ->
+ 1;
+prioritise_info(_, _Len, _State) ->
+ 0.
+
+
+handle_msg(check_neighbours, State) ->
+ %% no-op - it's already been done by the calling handle_cast
+ {ok, State};
+
+handle_msg({catchup, Left, MembersStateLeft},
+ State = #state { self = Self,
+ left = {Left, _MRefL},
+ right = {Right, _MRefR},
+ view = View,
+ members_state = undefined }) ->
+ ok = send_right(Right, View, {catchup, Self, MembersStateLeft}),
+ MembersStateLeft1 = build_members_state(MembersStateLeft),
+ {ok, State #state { members_state = MembersStateLeft1 }};
+
+handle_msg({catchup, Left, MembersStateLeft},
+ State = #state { self = Self,
+ left = {Left, _MRefL},
+ view = View,
+ members_state = MembersState })
+ when MembersState =/= undefined ->
+ MembersStateLeft1 = build_members_state(MembersStateLeft),
+ AllMembers = lists:usort(maps:keys(MembersState) ++
+ maps:keys(MembersStateLeft1)),
+ {MembersState1, Activity} =
+ lists:foldl(
+ fun (Id, MembersStateActivity) ->
+ #member { pending_ack = PALeft, last_ack = LA } =
+ find_member_or_blank(Id, MembersStateLeft1),
+ with_member_acc(
+ fun (#member { pending_ack = PA } = Member, Activity1) ->
+ case is_member_alias(Id, Self, View) of
+ true ->
+ {_AcksInFlight, Pubs, _PA1} =
+ find_prefix_common_suffix(PALeft, PA),
+ {Member #member { last_ack = LA },
+ activity_cons(Id, pubs_from_queue(Pubs),
+ [], Activity1)};
+ false ->
+ {Acks, _Common, Pubs} =
+ find_prefix_common_suffix(PA, PALeft),
+ {Member,
+ activity_cons(Id, pubs_from_queue(Pubs),
+ acks_from_queue(Acks),
+ Activity1)}
+ end
+ end, Id, MembersStateActivity)
+ end, {MembersState, activity_nil()}, AllMembers),
+ handle_msg({activity, Left, activity_finalise(Activity)},
+ State #state { members_state = MembersState1 });
+
+handle_msg({catchup, _NotLeft, _MembersState}, State) ->
+ {ok, State};
+
+handle_msg({activity, Left, Activity},
+ State = #state { self = Self,
+ group_name = GroupName,
+ left = {Left, _MRefL},
+ view = View,
+ members_state = MembersState,
+ confirms = Confirms })
+ when MembersState =/= undefined ->
+ try
+ %% If we have to stop, do it asap so we avoid any ack confirmation
+ %% Membership must be checked again by erase_members_in_group, as the
+ %% node can be marked as dead on the meanwhile
+ check_membership(GroupName),
+ {MembersState1, {Confirms1, Activity1}} =
+ calculate_activity(MembersState, Confirms, Activity, Self, View),
+ State1 = State #state { members_state = MembersState1,
+ confirms = Confirms1 },
+ Activity3 = activity_finalise(Activity1),
+ ok = maybe_send_activity(Activity3, State1),
+ {Result, State2} = maybe_erase_aliases(State1, View),
+ if_callback_success(
+ Result, fun activity_true/3, fun activity_false/3, Activity3, State2)
+ catch
+ lost_membership ->
+ {{stop, shutdown}, State}
+ end;
+
+handle_msg({activity, _NotLeft, _Activity}, State) ->
+ {ok, State}.
+
+
+noreply(State) ->
+ {noreply, ensure_timers(State), flush_timeout(State)}.
+
+reply(Reply, State) ->
+ {reply, Reply, ensure_timers(State), flush_timeout(State)}.
+
+ensure_timers(State) ->
+ ensure_force_gc_timer(ensure_broadcast_timer(State)).
+
+flush_timeout(#state{broadcast_buffer = []}) -> infinity;
+flush_timeout(_) -> 0.
+
+ensure_force_gc_timer(State = #state { force_gc_timer = TRef })
+ when is_reference(TRef) ->
+ State;
+ensure_force_gc_timer(State = #state { force_gc_timer = undefined }) ->
+ TRef = erlang:send_after(?FORCE_GC_TIMER, self(), force_gc),
+ State #state { force_gc_timer = TRef }.
+
+ensure_broadcast_timer(State = #state { broadcast_buffer = [],
+ broadcast_timer = undefined }) ->
+ State;
+ensure_broadcast_timer(State = #state { broadcast_buffer = [],
+ broadcast_timer = TRef }) ->
+ _ = erlang:cancel_timer(TRef),
+ State #state { broadcast_timer = undefined };
+ensure_broadcast_timer(State = #state { broadcast_timer = undefined }) ->
+ TRef = erlang:send_after(?BROADCAST_TIMER, self(), flush),
+ State #state { broadcast_timer = TRef };
+ensure_broadcast_timer(State) ->
+ State.
+
+internal_broadcast(Msg, SizeHint,
+ State = #state { self = Self,
+ pub_count = PubCount,
+ module = Module,
+ callback_args = Args,
+ broadcast_buffer = Buffer,
+ broadcast_buffer_sz = BufferSize }) ->
+ PubCount1 = PubCount + 1,
+ {Module:handle_msg(Args, get_pid(Self), Msg),
+ State #state { pub_count = PubCount1,
+ broadcast_buffer = [{PubCount1, Msg} | Buffer],
+ broadcast_buffer_sz = BufferSize + SizeHint}}.
+
+%% The Erlang distribution mechanism has an interesting quirk - it
+%% will kill the VM cold with "Absurdly large distribution output data
+%% buffer" if you attempt to send a message which serialises out to
+%% more than 2^31 bytes in size. It's therefore a very good idea to
+%% make sure that we don't exceed that size!
+%%
+%% Now, we could figure out the size of messages as they come in using
+%% size(term_to_binary(Msg)) or similar. The trouble is, that requires
+%% us to serialise the message only to throw the serialised form
+%% away. Hard to believe that's a sensible thing to do. So instead we
+%% accept a size hint from the application, via broadcast/3. This size
+%% hint can be the size of anything in the message which we expect
+%% could be large, and we just ignore the size of any small bits of
+%% the message term. Therefore MAX_BUFFER_SIZE is set somewhat
+%% conservatively at 100MB - but the buffer is only to allow us to
+%% buffer tiny messages anyway, so 100MB is plenty.
+
+maybe_flush_broadcast_buffer(State = #state{broadcast_buffer_sz = Size}) ->
+ case Size > ?MAX_BUFFER_SIZE of
+ true -> flush_broadcast_buffer(State);
+ false -> State
+ end.
+
+flush_broadcast_buffer(State = #state { broadcast_buffer = [] }) ->
+ State;
+flush_broadcast_buffer(State = #state { self = Self,
+ members_state = MembersState,
+ broadcast_buffer = Buffer,
+ pub_count = PubCount }) ->
+ [{PubCount, _Msg}|_] = Buffer, %% ASSERTION match on PubCount
+ Pubs = lists:reverse(Buffer),
+ Activity = activity_cons(Self, Pubs, [], activity_nil()),
+ ok = maybe_send_activity(activity_finalise(Activity), State),
+ MembersState1 = with_member(
+ fun (Member = #member { pending_ack = PA }) ->
+ PA1 = queue:join(PA, queue:from_list(Pubs)),
+ Member #member { pending_ack = PA1,
+ last_pub = PubCount }
+ end, Self, MembersState),
+ State #state { members_state = MembersState1,
+ broadcast_buffer = [],
+ broadcast_buffer_sz = 0 }.
+
+%% ---------------------------------------------------------------------------
+%% View construction and inspection
+%% ---------------------------------------------------------------------------
+
+needs_view_update(ReqVer, {Ver, _View}) -> Ver < ReqVer.
+
+view_version({Ver, _View}) -> Ver.
+
+is_member_alive({dead, _Member}) -> false;
+is_member_alive(_) -> true.
+
+is_member_alias(Self, Self, _View) ->
+ true;
+is_member_alias(Member, Self, View) ->
+ ?SETS:is_element(Member,
+ ((fetch_view_member(Self, View)) #view_member.aliases)).
+
+dead_member_id({dead, Member}) -> Member.
+
+store_view_member(VMember = #view_member { id = Id }, {Ver, View}) ->
+ {Ver, maps:put(Id, VMember, View)}.
+
+with_view_member(Fun, View, Id) ->
+ store_view_member(Fun(fetch_view_member(Id, View)), View).
+
+fetch_view_member(Id, {_Ver, View}) -> maps:get(Id, View).
+
+find_view_member(Id, {_Ver, View}) -> maps:find(Id, View).
+
+blank_view(Ver) -> {Ver, maps:new()}.
+
+alive_view_members({_Ver, View}) -> maps:keys(View).
+
+all_known_members({_Ver, View}) ->
+ maps:fold(
+ fun (Member, #view_member { aliases = Aliases }, Acc) ->
+ ?SETS:to_list(Aliases) ++ [Member | Acc]
+ end, [], View).
+
+group_to_view(#gm_group { members = Members, version = Ver }) ->
+ Alive = lists:filter(fun is_member_alive/1, Members),
+ [_|_] = Alive, %% ASSERTION - can't have all dead members
+ add_aliases(link_view(Alive ++ Alive ++ Alive, blank_view(Ver)), Members).
+
+link_view([Left, Middle, Right | Rest], View) ->
+ case find_view_member(Middle, View) of
+ error ->
+ link_view(
+ [Middle, Right | Rest],
+ store_view_member(#view_member { id = Middle,
+ aliases = ?SETS:new(),
+ left = Left,
+ right = Right }, View));
+ {ok, _} ->
+ View
+ end;
+link_view(_, View) ->
+ View.
+
+add_aliases(View, Members) ->
+ Members1 = ensure_alive_suffix(Members),
+ {EmptyDeadSet, View1} =
+ lists:foldl(
+ fun (Member, {DeadAcc, ViewAcc}) ->
+ case is_member_alive(Member) of
+ true ->
+ {?SETS:new(),
+ with_view_member(
+ fun (VMember =
+ #view_member { aliases = Aliases }) ->
+ VMember #view_member {
+ aliases = ?SETS:union(Aliases, DeadAcc) }
+ end, ViewAcc, Member)};
+ false ->
+ {?SETS:add_element(dead_member_id(Member), DeadAcc),
+ ViewAcc}
+ end
+ end, {?SETS:new(), View}, Members1),
+ 0 = ?SETS:size(EmptyDeadSet), %% ASSERTION
+ View1.
+
+ensure_alive_suffix(Members) ->
+ queue:to_list(ensure_alive_suffix1(queue:from_list(Members))).
+
+ensure_alive_suffix1(MembersQ) ->
+ {{value, Member}, MembersQ1} = queue:out_r(MembersQ),
+ case is_member_alive(Member) of
+ true -> MembersQ;
+ false -> ensure_alive_suffix1(queue:in_r(Member, MembersQ1))
+ end.
+
+
+%% ---------------------------------------------------------------------------
+%% View modification
+%% ---------------------------------------------------------------------------
+
+join_group(Self, GroupName, TxnFun) ->
+ join_group(Self, GroupName, dirty_read_group(GroupName), TxnFun).
+
+join_group(Self, GroupName, {error, not_found}, TxnFun) ->
+ join_group(Self, GroupName,
+ prune_or_create_group(Self, GroupName, TxnFun), TxnFun);
+join_group(Self, _GroupName, #gm_group { members = [Self] } = Group, _TxnFun) ->
+ group_to_view(Group);
+join_group(Self, GroupName, #gm_group { members = Members } = Group, TxnFun) ->
+ case lists:member(Self, Members) of
+ true ->
+ group_to_view(Group);
+ false ->
+ case lists:filter(fun is_member_alive/1, Members) of
+ [] ->
+ join_group(Self, GroupName,
+ prune_or_create_group(Self, GroupName, TxnFun),
+ TxnFun);
+ Alive ->
+ Left = lists:nth(rand:uniform(length(Alive)), Alive),
+ Handler =
+ fun () ->
+ join_group(
+ Self, GroupName,
+ record_dead_member_in_group(Self,
+ Left, GroupName, TxnFun, false),
+ TxnFun)
+ end,
+ try
+ case neighbour_call(Left, {add_on_right, Self}) of
+ {ok, Group1} -> group_to_view(Group1);
+ not_ready -> join_group(Self, GroupName, TxnFun)
+ end
+ catch
+ exit:{R, _}
+ when R =:= noproc; R =:= normal; R =:= shutdown ->
+ Handler();
+ exit:{{R, _}, _}
+ when R =:= nodedown; R =:= shutdown ->
+ Handler()
+ end
+ end
+ end.
+
+dirty_read_group(GroupName) ->
+ case mnesia:dirty_read(?GROUP_TABLE, GroupName) of
+ [] -> {error, not_found};
+ [Group] -> Group
+ end.
+
+read_group(GroupName) ->
+ case mnesia:read({?GROUP_TABLE, GroupName}) of
+ [] -> {error, not_found};
+ [Group] -> Group
+ end.
+
+write_group(Group) -> mnesia:write(?GROUP_TABLE, Group, write), Group.
+
+prune_or_create_group(Self, GroupName, TxnFun) ->
+ TxnFun(
+ fun () ->
+ GroupNew = #gm_group { name = GroupName,
+ members = [Self],
+ version = get_version(Self) },
+ case read_group(GroupName) of
+ {error, not_found} ->
+ write_group(GroupNew);
+ Group = #gm_group { members = Members } ->
+ case lists:any(fun is_member_alive/1, Members) of
+ true -> Group;
+ false -> write_group(GroupNew)
+ end
+ end
+ end).
+
+record_dead_member_in_group(Self, Member, GroupName, TxnFun, Verify) ->
+ Fun =
+ fun () ->
+ try
+ Group = #gm_group { members = Members, version = Ver } =
+ case Verify of
+ true ->
+ check_membership(Self, read_group(GroupName));
+ false ->
+ check_group(read_group(GroupName))
+ end,
+ case lists:splitwith(
+ fun (Member1) -> Member1 =/= Member end, Members) of
+ {_Members1, []} -> %% not found - already recorded dead
+ Group;
+ {Members1, [Member | Members2]} ->
+ Members3 = Members1 ++ [{dead, Member} | Members2],
+ write_group(Group #gm_group { members = Members3,
+ version = Ver + 1 })
+ end
+ catch
+ lost_membership ->
+ %% The transaction must not be abruptly crashed, but
+ %% leave the gen_server to stop normally
+ {error, lost_membership}
+ end
+ end,
+ handle_lost_membership_in_txn(TxnFun, Fun).
+
+handle_lost_membership_in_txn(TxnFun, Fun) ->
+ case TxnFun(Fun) of
+ {error, lost_membership = T} ->
+ throw(T);
+ Any ->
+ Any
+ end.
+
+record_new_member_in_group(NewMember, Left, GroupName, TxnFun) ->
+ Fun =
+ fun () ->
+ try
+ Group = #gm_group { members = Members, version = Ver } =
+ check_membership(Left, read_group(GroupName)),
+ case lists:member(NewMember, Members) of
+ true ->
+ %% This avois duplicates during partial partitions,
+ %% as inconsistent views might happen during them
+ rabbit_log:warning("(~p) GM avoiding duplicate of ~p",
+ [self(), NewMember]),
+ Group;
+ false ->
+ {Prefix, [Left | Suffix]} =
+ lists:splitwith(fun (M) -> M =/= Left end, Members),
+ write_group(Group #gm_group {
+ members = Prefix ++ [Left, NewMember | Suffix],
+ version = Ver + 1 })
+ end
+ catch
+ lost_membership ->
+ %% The transaction must not be abruptly crashed, but
+ %% leave the gen_server to stop normally
+ {error, lost_membership}
+ end
+ end,
+ handle_lost_membership_in_txn(TxnFun, Fun).
+
+erase_members_in_group(Self, Members, GroupName, TxnFun) ->
+ DeadMembers = [{dead, Id} || Id <- Members],
+ Fun =
+ fun () ->
+ try
+ Group = #gm_group { members = [_|_] = Members1, version = Ver } =
+ check_membership(Self, read_group(GroupName)),
+ case Members1 -- DeadMembers of
+ Members1 -> Group;
+ Members2 -> write_group(
+ Group #gm_group { members = Members2,
+ version = Ver + 1 })
+ end
+ catch
+ lost_membership ->
+ %% The transaction must not be abruptly crashed, but
+ %% leave the gen_server to stop normally
+ {error, lost_membership}
+ end
+ end,
+ handle_lost_membership_in_txn(TxnFun, Fun).
+
+maybe_erase_aliases(State = #state { self = Self,
+ group_name = GroupName,
+ members_state = MembersState,
+ txn_executor = TxnFun }, View) ->
+ #view_member { aliases = Aliases } = fetch_view_member(Self, View),
+ {Erasable, MembersState1}
+ = ?SETS:fold(
+ fun (Id, {ErasableAcc, MembersStateAcc} = Acc) ->
+ #member { last_pub = LP, last_ack = LA } =
+ find_member_or_blank(Id, MembersState),
+ case can_erase_view_member(Self, Id, LA, LP) of
+ true -> {[Id | ErasableAcc],
+ erase_member(Id, MembersStateAcc)};
+ false -> Acc
+ end
+ end, {[], MembersState}, Aliases),
+ View1 = case Erasable of
+ [] -> View;
+ _ -> group_to_view(
+ erase_members_in_group(Self, Erasable, GroupName, TxnFun))
+ end,
+ change_view(View1, State #state { members_state = MembersState1 }).
+
+can_erase_view_member(Self, Self, _LA, _LP) -> false;
+can_erase_view_member(_Self, _Id, N, N) -> true;
+can_erase_view_member(_Self, _Id, _LA, _LP) -> false.
+
+neighbour_cast(N, Msg) -> ?INSTR_MOD:cast(get_pid(N), Msg).
+neighbour_call(N, Msg) -> ?INSTR_MOD:call(get_pid(N), Msg, infinity).
+
+%% ---------------------------------------------------------------------------
+%% View monitoring and maintenance
+%% ---------------------------------------------------------------------------
+
+ensure_neighbour(_Ver, Self, {Self, undefined}, Self) ->
+ {Self, undefined};
+ensure_neighbour(Ver, Self, {Self, undefined}, RealNeighbour) ->
+ ok = neighbour_cast(RealNeighbour, {?TAG, Ver, check_neighbours}),
+ {RealNeighbour, maybe_monitor(RealNeighbour, Self)};
+ensure_neighbour(_Ver, _Self, {RealNeighbour, MRef}, RealNeighbour) ->
+ {RealNeighbour, MRef};
+ensure_neighbour(Ver, Self, {RealNeighbour, MRef}, Neighbour) ->
+ true = ?INSTR_MOD:demonitor(MRef),
+ Msg = {?TAG, Ver, check_neighbours},
+ ok = neighbour_cast(RealNeighbour, Msg),
+ ok = case Neighbour of
+ Self -> ok;
+ _ -> neighbour_cast(Neighbour, Msg)
+ end,
+ {Neighbour, maybe_monitor(Neighbour, Self)}.
+
+maybe_monitor( Self, Self) -> undefined;
+maybe_monitor(Other, _Self) -> ?INSTR_MOD:monitor(get_pid(Other)).
+
+check_neighbours(State = #state { self = Self,
+ left = Left,
+ right = Right,
+ view = View,
+ broadcast_buffer = Buffer }) ->
+ #view_member { left = VLeft, right = VRight }
+ = fetch_view_member(Self, View),
+ Ver = view_version(View),
+ Left1 = ensure_neighbour(Ver, Self, Left, VLeft),
+ Right1 = ensure_neighbour(Ver, Self, Right, VRight),
+ Buffer1 = case Right1 of
+ {Self, undefined} -> [];
+ _ -> Buffer
+ end,
+ State1 = State #state { left = Left1, right = Right1,
+ broadcast_buffer = Buffer1 },
+ ok = maybe_send_catchup(Right, State1),
+ State1.
+
+maybe_send_catchup(Right, #state { right = Right }) ->
+ ok;
+maybe_send_catchup(_Right, #state { self = Self,
+ right = {Self, undefined} }) ->
+ ok;
+maybe_send_catchup(_Right, #state { members_state = undefined }) ->
+ ok;
+maybe_send_catchup(_Right, #state { self = Self,
+ right = {Right, _MRef},
+ view = View,
+ members_state = MembersState }) ->
+ send_right(Right, View,
+ {catchup, Self, prepare_members_state(MembersState)}).
+
+
+%% ---------------------------------------------------------------------------
+%% Catch_up delta detection
+%% ---------------------------------------------------------------------------
+
+find_prefix_common_suffix(A, B) ->
+ {Prefix, A1} = find_prefix(A, B, queue:new()),
+ {Common, Suffix} = find_common(A1, B, queue:new()),
+ {Prefix, Common, Suffix}.
+
+%% Returns the elements of A that occur before the first element of B,
+%% plus the remainder of A.
+find_prefix(A, B, Prefix) ->
+ case {queue:out(A), queue:out(B)} of
+ {{{value, Val}, _A1}, {{value, Val}, _B1}} ->
+ {Prefix, A};
+ {{empty, A1}, {{value, _A}, _B1}} ->
+ {Prefix, A1};
+ {{{value, {NumA, _MsgA} = Val}, A1},
+ {{value, {NumB, _MsgB}}, _B1}} when NumA < NumB ->
+ find_prefix(A1, B, queue:in(Val, Prefix));
+ {_, {empty, _B1}} ->
+ {A, Prefix} %% Prefix well be empty here
+ end.
+
+%% A should be a prefix of B. Returns the commonality plus the
+%% remainder of B.
+find_common(A, B, Common) ->
+ case {queue:out(A), queue:out(B)} of
+ {{{value, Val}, A1}, {{value, Val}, B1}} ->
+ find_common(A1, B1, queue:in(Val, Common));
+ {{empty, _A}, _} ->
+ {Common, B};
+ %% Drop value from B.
+ %% Match value to avoid infinite loop, since {empty, B} = queue:out(B).
+ {_, {{value, _}, B1}} ->
+ find_common(A, B1, Common);
+ %% Drop value from A. Empty A should be matched by second close.
+ {{{value, _}, A1}, _} ->
+ find_common(A1, B, Common)
+ end.
+
+
+%% ---------------------------------------------------------------------------
+%% Members helpers
+%% ---------------------------------------------------------------------------
+
+with_member(Fun, Id, MembersState) ->
+ store_member(
+ Id, Fun(find_member_or_blank(Id, MembersState)), MembersState).
+
+with_member_acc(Fun, Id, {MembersState, Acc}) ->
+ {MemberState, Acc1} = Fun(find_member_or_blank(Id, MembersState), Acc),
+ {store_member(Id, MemberState, MembersState), Acc1}.
+
+find_member_or_blank(Id, MembersState) ->
+ case maps:find(Id, MembersState) of
+ {ok, Result} -> Result;
+ error -> blank_member()
+ end.
+
+erase_member(Id, MembersState) -> maps:remove(Id, MembersState).
+
+blank_member() ->
+ #member { pending_ack = queue:new(), last_pub = -1, last_ack = -1 }.
+
+blank_member_state() -> maps:new().
+
+store_member(Id, MemberState, MembersState) ->
+ maps:put(Id, MemberState, MembersState).
+
+prepare_members_state(MembersState) -> maps:to_list(MembersState).
+
+build_members_state(MembersStateList) -> maps:from_list(MembersStateList).
+
+make_member(GroupName) ->
+ {case dirty_read_group(GroupName) of
+ #gm_group { version = Version } -> Version;
+ {error, not_found} -> ?VERSION_START
+ end, self()}.
+
+remove_erased_members(MembersState, View) ->
+ lists:foldl(fun (Id, MembersState1) ->
+ store_member(Id, find_member_or_blank(Id, MembersState),
+ MembersState1)
+ end, blank_member_state(), all_known_members(View)).
+
+get_version({Version, _Pid}) -> Version.
+
+get_pid({_Version, Pid}) -> Pid.
+
+get_pids(Ids) -> [Pid || {_Version, Pid} <- Ids].
+
+%% ---------------------------------------------------------------------------
+%% Activity assembly
+%% ---------------------------------------------------------------------------
+
+activity_nil() -> queue:new().
+
+activity_cons( _Id, [], [], Tail) -> Tail;
+activity_cons(Sender, Pubs, Acks, Tail) -> queue:in({Sender, Pubs, Acks}, Tail).
+
+activity_finalise(Activity) -> queue:to_list(Activity).
+
+maybe_send_activity([], _State) ->
+ ok;
+maybe_send_activity(Activity, #state { self = Self,
+ right = {Right, _MRefR},
+ view = View }) ->
+ send_right(Right, View, {activity, Self, Activity}).
+
+send_right(Right, View, Msg) ->
+ ok = neighbour_cast(Right, {?TAG, view_version(View), Msg}).
+
+calculate_activity(MembersState, Confirms, Activity, Self, View) ->
+ lists:foldl(
+ fun ({Id, Pubs, Acks}, MembersStateConfirmsActivity) ->
+ with_member_acc(
+ fun (Member = #member { pending_ack = PA,
+ last_pub = LP,
+ last_ack = LA },
+ {Confirms2, Activity2}) ->
+ case is_member_alias(Id, Self, View) of
+ true ->
+ {ToAck, PA1} =
+ find_common(queue_from_pubs(Pubs), PA,
+ queue:new()),
+ LA1 = last_ack(Acks, LA),
+ AckNums = acks_from_queue(ToAck),
+ Confirms3 = maybe_confirm(
+ Self, Id, Confirms2, AckNums),
+ {Member #member { pending_ack = PA1,
+ last_ack = LA1 },
+ {Confirms3,
+ activity_cons(
+ Id, [], AckNums, Activity2)}};
+ false ->
+ PA1 = apply_acks(Acks, join_pubs(PA, Pubs)),
+ LA1 = last_ack(Acks, LA),
+ LP1 = last_pub(Pubs, LP),
+ {Member #member { pending_ack = PA1,
+ last_pub = LP1,
+ last_ack = LA1 },
+ {Confirms2,
+ activity_cons(Id, Pubs, Acks, Activity2)}}
+ end
+ end, Id, MembersStateConfirmsActivity)
+ end, {MembersState, {Confirms, activity_nil()}}, Activity).
+
+callback(Args, Module, Activity) ->
+ Result =
+ lists:foldl(
+ fun ({Id, Pubs, _Acks}, {Args1, Module1, ok}) ->
+ lists:foldl(fun ({_PubNum, Pub}, Acc = {Args2, Module2, ok}) ->
+ case Module2:handle_msg(
+ Args2, get_pid(Id), Pub) of
+ ok ->
+ Acc;
+ {become, Module3, Args3} ->
+ {Args3, Module3, ok};
+ {stop, _Reason} = Error ->
+ Error
+ end;
+ (_, Error = {stop, _Reason}) ->
+ Error
+ end, {Args1, Module1, ok}, Pubs);
+ (_, Error = {stop, _Reason}) ->
+ Error
+ end, {Args, Module, ok}, Activity),
+ case Result of
+ {Args, Module, ok} -> ok;
+ {Args1, Module1, ok} -> {become, Module1, Args1};
+ {stop, _Reason} = Error -> Error
+ end.
+
+change_view(View, State = #state { view = View0,
+ module = Module,
+ callback_args = Args }) ->
+ OldMembers = all_known_members(View0),
+ NewMembers = all_known_members(View),
+ Births = NewMembers -- OldMembers,
+ Deaths = OldMembers -- NewMembers,
+ Result = case {Births, Deaths} of
+ {[], []} -> ok;
+ _ -> Module:members_changed(
+ Args, get_pids(Births), get_pids(Deaths))
+ end,
+ {Result, check_neighbours(State #state { view = View })}.
+
+handle_callback_result({Result, State}) ->
+ if_callback_success(
+ Result, fun no_reply_true/3, fun no_reply_false/3, undefined, State);
+handle_callback_result({Result, Reply, State}) ->
+ if_callback_success(
+ Result, fun reply_true/3, fun reply_false/3, Reply, State).
+
+no_reply_true (_Result, _Undefined, State) -> noreply(State).
+no_reply_false({stop, Reason}, _Undefined, State) -> {stop, Reason, State}.
+
+reply_true (_Result, Reply, State) -> reply(Reply, State).
+reply_false({stop, Reason}, Reply, State) -> {stop, Reason, Reply, State}.
+
+handle_msg_true (_Result, Msg, State) -> handle_msg(Msg, State).
+handle_msg_false(Result, _Msg, State) -> {Result, State}.
+
+activity_true(_Result, Activity, State = #state { module = Module,
+ callback_args = Args }) ->
+ {callback(Args, Module, Activity), State}.
+activity_false(Result, _Activity, State) ->
+ {Result, State}.
+
+if_callback_success(Result, True, False, Arg, State) ->
+ {NewResult, NewState} = maybe_stop(Result, State),
+ if_callback_success1(NewResult, True, False, Arg, NewState).
+
+if_callback_success1(ok, True, _False, Arg, State) ->
+ True(ok, Arg, State);
+if_callback_success1(
+ {become, Module, Args} = Result, True, _False, Arg, State) ->
+ True(Result, Arg, State #state { module = Module,
+ callback_args = Args });
+if_callback_success1({stop, _Reason} = Result, _True, False, Arg, State) ->
+ False(Result, Arg, State).
+
+maybe_stop({stop, Reason}, #state{ shutting_down = false } = State) ->
+ ShuttingDown = {true, Reason},
+ case has_pending_messages(State) of
+ true -> {ok, State #state{ shutting_down = ShuttingDown }};
+ false -> {{stop, Reason}, State #state{ shutting_down = ShuttingDown }}
+ end;
+maybe_stop(Result, #state{ shutting_down = false } = State) ->
+ {Result, State};
+maybe_stop(Result, #state{ shutting_down = {true, Reason} } = State) ->
+ case has_pending_messages(State) of
+ true -> {Result, State};
+ false -> {{stop, Reason}, State}
+ end.
+
+has_pending_messages(#state{ broadcast_buffer = Buffer })
+ when Buffer =/= [] ->
+ true;
+has_pending_messages(#state{ members_state = MembersState }) ->
+ MembersWithPubAckMismatches = maps:filter(fun(_Id, #member{last_pub = LP, last_ack = LA}) ->
+ LP =/= LA
+ end, MembersState),
+ 0 =/= maps:size(MembersWithPubAckMismatches).
+
+maybe_confirm(_Self, _Id, Confirms, []) ->
+ Confirms;
+maybe_confirm(Self, Self, Confirms, [PubNum | PubNums]) ->
+ case queue:out(Confirms) of
+ {empty, _Confirms} ->
+ Confirms;
+ {{value, {PubNum, From}}, Confirms1} ->
+ gen_server2:reply(From, ok),
+ maybe_confirm(Self, Self, Confirms1, PubNums);
+ {{value, {PubNum1, _From}}, _Confirms} when PubNum1 > PubNum ->
+ maybe_confirm(Self, Self, Confirms, PubNums)
+ end;
+maybe_confirm(_Self, _Id, Confirms, _PubNums) ->
+ Confirms.
+
+purge_confirms(Confirms) ->
+ _ = [gen_server2:reply(From, ok) || {_PubNum, From} <- queue:to_list(Confirms)],
+ queue:new().
+
+
+%% ---------------------------------------------------------------------------
+%% Msg transformation
+%% ---------------------------------------------------------------------------
+
+acks_from_queue(Q) -> [PubNum || {PubNum, _Msg} <- queue:to_list(Q)].
+
+pubs_from_queue(Q) -> queue:to_list(Q).
+
+queue_from_pubs(Pubs) -> queue:from_list(Pubs).
+
+apply_acks( [], Pubs) -> Pubs;
+apply_acks(List, Pubs) -> {_, Pubs1} = queue:split(length(List), Pubs),
+ Pubs1.
+
+join_pubs(Q, []) -> Q;
+join_pubs(Q, Pubs) -> queue:join(Q, queue_from_pubs(Pubs)).
+
+last_ack( [], LA) -> LA;
+last_ack(List, LA) -> LA1 = lists:last(List),
+ true = LA1 > LA, %% ASSERTION
+ LA1.
+
+last_pub( [], LP) -> LP;
+last_pub(List, LP) -> {PubNum, _Msg} = lists:last(List),
+ true = PubNum > LP, %% ASSERTION
+ PubNum.
+
+%% ---------------------------------------------------------------------------
+
+%% Uninstrumented versions
+
+call(Pid, Msg, Timeout) -> gen_server2:call(Pid, Msg, Timeout).
+cast(Pid, Msg) -> gen_server2:cast(Pid, Msg).
+monitor(Pid) -> erlang:monitor(process, Pid).
+demonitor(MRef) -> erlang:demonitor(MRef).
+
+check_membership(Self, #gm_group{members = M} = Group) ->
+ case lists:member(Self, M) of
+ true ->
+ Group;
+ false ->
+ throw(lost_membership)
+ end;
+check_membership(_Self, {error, not_found}) ->
+ throw(lost_membership).
+
+check_membership(GroupName) ->
+ case dirty_read_group(GroupName) of
+ #gm_group{members = M} ->
+ case lists:keymember(self(), 2, M) of
+ true ->
+ ok;
+ false ->
+ throw(lost_membership)
+ end;
+ {error, not_found} ->
+ throw(lost_membership)
+ end.
+
+check_group({error, not_found}) ->
+ throw(lost_membership);
+check_group(Any) ->
+ Any.
diff --git a/deps/rabbit/src/internal_user.erl b/deps/rabbit/src/internal_user.erl
new file mode 100644
index 0000000000..b2bdcb6785
--- /dev/null
+++ b/deps/rabbit/src/internal_user.erl
@@ -0,0 +1,216 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(internal_user).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([
+ new/0,
+ new/1,
+ record_version_to_use/0,
+ fields/0,
+ fields/1,
+ upgrade/1,
+ upgrade_to/2,
+ pattern_match_all/0,
+ get_username/1,
+ get_password_hash/1,
+ get_tags/1,
+ get_hashing_algorithm/1,
+ get_limits/1,
+ create_user/3,
+ set_password_hash/3,
+ set_tags/2,
+ update_limits/3,
+ clear_limits/1
+]).
+
+-define(record_version, internal_user_v2).
+
+-type(username() :: binary()).
+
+-type(password_hash() :: binary()).
+
+-type internal_user() :: internal_user_v1:internal_user_v1() | internal_user_v2().
+
+-record(internal_user, {
+ username :: username() | '_',
+ password_hash :: password_hash() | '_',
+ tags :: [atom()] | '_',
+ %% password hashing implementation module,
+ %% typically rabbit_password_hashing_* but can
+ %% come from a plugin
+ hashing_algorithm :: atom() | '_',
+ limits = #{} :: map() | '_'}).
+
+-type(internal_user_v2() ::
+ #internal_user{username :: username() | '_',
+ password_hash :: password_hash() | '_',
+ tags :: [atom()] | '_',
+ hashing_algorithm :: atom() | '_',
+ limits :: map()}).
+
+-type internal_user_pattern() :: internal_user_v1:internal_user_v1_pattern() |
+ internal_user_v2_pattern().
+
+-type internal_user_v2_pattern() :: #internal_user{
+ username :: username() | '_',
+ password_hash :: '_',
+ tags :: '_',
+ hashing_algorithm :: '_',
+ limits :: '_'
+ }.
+
+-export_type([username/0,
+ password_hash/0,
+ internal_user/0,
+ internal_user_v2/0,
+ internal_user_pattern/0,
+ internal_user_v2_pattern/0]).
+
+-spec new() -> internal_user().
+new() ->
+ case record_version_to_use() of
+ ?record_version ->
+ #internal_user{
+ username = <<"">>,
+ password_hash = <<"">>,
+ tags = []
+ };
+ _ ->
+ internal_user_v1:new()
+ end.
+
+-spec new(tuple()) -> internal_user().
+new({hashing_algorithm, HashingAlgorithm}) ->
+ case record_version_to_use() of
+ ?record_version ->
+ #internal_user{
+ username = <<"">>,
+ password_hash = <<"">>,
+ tags = [],
+ hashing_algorithm = HashingAlgorithm
+ };
+ _ ->
+ internal_user_v1:new({hashing_algorithm, HashingAlgorithm})
+ end;
+new({tags, Tags}) ->
+ case record_version_to_use() of
+ ?record_version ->
+ #internal_user{
+ username = <<"">>,
+ password_hash = <<"">>,
+ tags = Tags
+ };
+ _ ->
+ internal_user_v1:new({tags, Tags})
+ end.
+
+-spec record_version_to_use() -> internal_user_v1 | internal_user_v2.
+record_version_to_use() ->
+ case rabbit_feature_flags:is_enabled(user_limits) of
+ true -> ?record_version;
+ false -> internal_user_v1:record_version_to_use()
+ end.
+
+-spec fields() -> list().
+fields() ->
+ case record_version_to_use() of
+ ?record_version -> fields(?record_version);
+ _ -> internal_user_v1:fields()
+ end.
+
+-spec fields(atom()) -> list().
+fields(?record_version) -> record_info(fields, internal_user);
+fields(Version) -> internal_user_v1:fields(Version).
+
+-spec upgrade(internal_user()) -> internal_user().
+upgrade(#internal_user{} = User) -> User;
+upgrade(OldUser) -> upgrade_to(record_version_to_use(), OldUser).
+
+-spec upgrade_to
+(internal_user_v2, internal_user()) -> internal_user_v2();
+(internal_user_v1, internal_user_v1:internal_user_v1()) -> internal_user_v1:internal_user_v1().
+
+upgrade_to(?record_version, #internal_user{} = User) ->
+ User;
+upgrade_to(?record_version, OldUser) ->
+ Fields = erlang:tuple_to_list(OldUser) ++ [#{}],
+ #internal_user{} = erlang:list_to_tuple(Fields);
+upgrade_to(Version, OldUser) ->
+ internal_user_v1:upgrade_to(Version, OldUser).
+
+-spec pattern_match_all() -> internal_user_pattern().
+pattern_match_all() ->
+ case record_version_to_use() of
+ ?record_version -> #internal_user{_ = '_'};
+ _ -> internal_user_v1:pattern_match_all()
+ end.
+
+-spec get_username(internal_user()) -> username().
+get_username(#internal_user{username = Value}) -> Value;
+get_username(User) -> internal_user_v1:get_username(User).
+
+-spec get_password_hash(internal_user()) -> password_hash().
+get_password_hash(#internal_user{password_hash = Value}) -> Value;
+get_password_hash(User) -> internal_user_v1:get_password_hash(User).
+
+-spec get_tags(internal_user()) -> [atom()].
+get_tags(#internal_user{tags = Value}) -> Value;
+get_tags(User) -> internal_user_v1:get_tags(User).
+
+-spec get_hashing_algorithm(internal_user()) -> atom().
+get_hashing_algorithm(#internal_user{hashing_algorithm = Value}) -> Value;
+get_hashing_algorithm(User) -> internal_user_v1:get_hashing_algorithm(User).
+
+-spec get_limits(internal_user()) -> map().
+get_limits(#internal_user{limits = Value}) -> Value;
+get_limits(User) -> internal_user_v1:get_limits(User).
+
+-spec create_user(username(), password_hash(), atom()) -> internal_user().
+create_user(Username, PasswordHash, HashingMod) ->
+ case record_version_to_use() of
+ ?record_version ->
+ #internal_user{username = Username,
+ password_hash = PasswordHash,
+ tags = [],
+ hashing_algorithm = HashingMod,
+ limits = #{}
+ };
+ _ ->
+ internal_user_v1:create_user(Username, PasswordHash, HashingMod)
+ end.
+
+-spec set_password_hash(internal_user(), password_hash(), atom()) -> internal_user().
+set_password_hash(#internal_user{} = User, PasswordHash, HashingAlgorithm) ->
+ User#internal_user{password_hash = PasswordHash,
+ hashing_algorithm = HashingAlgorithm};
+set_password_hash(User, PasswordHash, HashingAlgorithm) ->
+ internal_user_v1:set_password_hash(User, PasswordHash, HashingAlgorithm).
+
+-spec set_tags(internal_user(), [atom()]) -> internal_user().
+set_tags(#internal_user{} = User, Tags) ->
+ User#internal_user{tags = Tags};
+set_tags(User, Tags) ->
+ internal_user_v1:set_tags(User, Tags).
+
+-spec update_limits
+(add, internal_user(), map()) -> internal_user();
+(remove, internal_user(), term()) -> internal_user().
+update_limits(add, #internal_user{limits = Limits} = User, Term) ->
+ User#internal_user{limits = maps:merge(Limits, Term)};
+update_limits(remove, #internal_user{limits = Limits} = User, LimitType) ->
+ User#internal_user{limits = maps:remove(LimitType, Limits)};
+update_limits(Action, User, Term) ->
+ internal_user_v1:update_limits(Action, User, Term).
+
+-spec clear_limits(internal_user()) -> internal_user().
+clear_limits(#internal_user{} = User) ->
+ User#internal_user{limits = #{}};
+clear_limits(User) ->
+ internal_user_v1:clear_limits(User).
diff --git a/deps/rabbit/src/internal_user_v1.erl b/deps/rabbit/src/internal_user_v1.erl
new file mode 100644
index 0000000000..edb956436f
--- /dev/null
+++ b/deps/rabbit/src/internal_user_v1.erl
@@ -0,0 +1,151 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(internal_user_v1).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([
+ new/0,
+ new/1,
+ record_version_to_use/0,
+ fields/0,
+ fields/1,
+ upgrade/1,
+ upgrade_to/2,
+ pattern_match_all/0,
+ get_username/1,
+ get_password_hash/1,
+ get_tags/1,
+ get_hashing_algorithm/1,
+ get_limits/1,
+ create_user/3,
+ set_password_hash/3,
+ set_tags/2,
+ update_limits/3,
+ clear_limits/1
+]).
+
+-define(record_version, ?MODULE).
+
+-record(internal_user, {
+ username :: internal_user:username() | '_',
+ password_hash :: internal_user:password_hash() | '_',
+ tags :: [atom()] | '_',
+ %% password hashing implementation module,
+ %% typically rabbit_password_hashing_* but can
+ %% come from a plugin
+ hashing_algorithm :: atom() | '_'}).
+
+-type internal_user() :: internal_user_v1().
+
+-type(internal_user_v1() ::
+ #internal_user{username :: internal_user:username(),
+ password_hash :: internal_user:password_hash(),
+ tags :: [atom()],
+ hashing_algorithm :: atom()}).
+
+-type internal_user_pattern() :: internal_user_v1_pattern().
+
+-type internal_user_v1_pattern() :: #internal_user{
+ username :: internal_user:username() | '_',
+ password_hash :: '_',
+ tags :: '_',
+ hashing_algorithm :: '_'
+ }.
+
+-export_type([internal_user/0,
+ internal_user_v1/0,
+ internal_user_pattern/0,
+ internal_user_v1_pattern/0]).
+
+-spec record_version_to_use() -> internal_user_v1.
+record_version_to_use() ->
+ ?record_version.
+
+-spec new() -> internal_user().
+new() ->
+ #internal_user{
+ username = <<"">>,
+ password_hash = <<"">>,
+ tags = []
+ }.
+
+-spec new(tuple()) -> internal_user().
+new({hashing_algorithm, HashingAlgorithm}) ->
+ #internal_user{
+ username = <<"">>,
+ password_hash = <<"">>,
+ hashing_algorithm = HashingAlgorithm,
+ tags = []
+ };
+new({tags, Tags}) ->
+ #internal_user{
+ username = <<"">>,
+ password_hash = <<"">>,
+ tags = Tags
+ }.
+
+-spec fields() -> list().
+fields() -> fields(?record_version).
+
+-spec fields(atom()) -> list().
+fields(?record_version) -> record_info(fields, internal_user).
+
+-spec upgrade(internal_user()) -> internal_user().
+upgrade(#internal_user{} = User) -> User.
+
+-spec upgrade_to(internal_user_v1, internal_user()) -> internal_user().
+upgrade_to(?record_version, #internal_user{} = User) ->
+ User.
+
+-spec pattern_match_all() -> internal_user_pattern().
+pattern_match_all() -> #internal_user{_ = '_'}.
+
+-spec get_username(internal_user()) -> internal_user:username().
+get_username(#internal_user{username = Value}) -> Value.
+
+-spec get_password_hash(internal_user()) -> internal_user:password_hash().
+get_password_hash(#internal_user{password_hash = Value}) -> Value.
+
+-spec get_tags(internal_user()) -> [atom()].
+get_tags(#internal_user{tags = Value}) -> Value.
+
+-spec get_hashing_algorithm(internal_user()) -> atom().
+get_hashing_algorithm(#internal_user{hashing_algorithm = Value}) -> Value.
+
+-spec get_limits(internal_user()) -> map().
+get_limits(_User) -> #{}.
+
+-spec create_user(internal_user:username(), internal_user:password_hash(),
+ atom()) -> internal_user().
+create_user(Username, PasswordHash, HashingMod) ->
+ #internal_user{username = Username,
+ password_hash = PasswordHash,
+ tags = [],
+ hashing_algorithm = HashingMod
+ }.
+
+-spec set_password_hash(internal_user:internal_user(),
+ internal_user:password_hash(), atom()) -> internal_user().
+set_password_hash(#internal_user{} = User, PasswordHash, HashingAlgorithm) ->
+ User#internal_user{password_hash = PasswordHash,
+ hashing_algorithm = HashingAlgorithm}.
+
+-spec set_tags(internal_user(), [atom()]) -> internal_user().
+set_tags(#internal_user{} = User, Tags) ->
+ User#internal_user{tags = Tags}.
+
+-spec update_limits
+(add, internal_user(), map()) -> internal_user();
+(remove, internal_user(), term()) -> internal_user().
+update_limits(_, User, _) ->
+ User.
+
+-spec clear_limits(internal_user()) -> internal_user().
+clear_limits(User) ->
+ User.
diff --git a/deps/rabbit/src/lager_exchange_backend.erl b/deps/rabbit/src/lager_exchange_backend.erl
new file mode 100644
index 0000000000..cd96f2230e
--- /dev/null
+++ b/deps/rabbit/src/lager_exchange_backend.erl
@@ -0,0 +1,233 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% @doc RabbitMQ backend for lager.
+%% Configuration is a proplist with the following keys:
+%% <ul>
+%% <li>`level' - log level to use</li>
+%% <li>`formatter' - the module to use when formatting log messages. Defaults to
+%% `lager_default_formatter'</li>
+%% <li>`formatter_config' - the format configuration string. Defaults to
+%% `time [ severity ] message'</li>
+%% </ul>
+
+-module(lager_exchange_backend).
+
+-behaviour(gen_event).
+
+-export([init/1, terminate/2, code_change/3,
+ handle_call/2, handle_event/2, handle_info/2]).
+
+-export([maybe_init_exchange/0]).
+
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+
+-include_lib("lager/include/lager.hrl").
+
+-record(state, {level :: {'mask', integer()},
+ formatter :: atom(),
+ format_config :: any(),
+ init_exchange_ts = undefined :: integer() | undefined,
+ exchange = undefined :: #resource{} | undefined}).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-compile([{parse_transform, lager_transform}]).
+-endif.
+
+-define(INIT_EXCHANGE_INTERVAL_SECS, 5).
+-define(TERSE_FORMAT, [time, " [", severity, "] ", message]).
+-define(DEFAULT_FORMAT_CONFIG, ?TERSE_FORMAT).
+-define(FORMAT_CONFIG_OFF, []).
+
+-ifdef(TEST).
+-define(DEPRECATED(_Msg), ok).
+-else.
+-define(DEPRECATED(Msg),
+ io:format(user, "WARNING: This is a deprecated lager_exchange_backend configuration. Please use \"~w\" instead.~n", [Msg])).
+-endif.
+
+-define(LOG_EXCH_NAME, <<"amq.rabbitmq.log">>).
+
+init([Level]) when is_atom(Level) ->
+ ?DEPRECATED([{level, Level}]),
+ init([{level, Level}]);
+init([Level, true]) when is_atom(Level) -> % for backwards compatibility
+ ?DEPRECATED([{level, Level}, {formatter_config, [{eol, "\\r\\n\\"}]}]),
+ init([{level, Level}, {formatter_config, ?FORMAT_CONFIG_OFF}]);
+init([Level, false]) when is_atom(Level) -> % for backwards compatibility
+ ?DEPRECATED([{level, Level}]),
+ init([{level, Level}]);
+
+init(Options) when is_list(Options) ->
+ true = validate_options(Options),
+ Level = get_option(level, Options, undefined),
+ try lager_util:config_to_mask(Level) of
+ L ->
+ DefaultOptions = [{formatter, lager_default_formatter},
+ {formatter_config, ?DEFAULT_FORMAT_CONFIG}],
+ [Formatter, Config] = [get_option(K, Options, Default) || {K, Default} <- DefaultOptions],
+ State0 = #state{level=L,
+ formatter=Formatter,
+ format_config=Config},
+ % NB: this will probably always fail since the / vhost isn't available
+ State1 = maybe_init_exchange(State0),
+ {ok, State1}
+ catch
+ _:_ ->
+ {error, {fatal, bad_log_level}}
+ end;
+init(Level) when is_atom(Level) ->
+ ?DEPRECATED([{level, Level}]),
+ init([{level, Level}]);
+init(Other) ->
+ {error, {fatal, {bad_lager_exchange_backend_config, Other}}}.
+
+% rabbitmq/rabbitmq-server#1973
+% This is called immediatly after the / vhost is created
+% or recovered
+maybe_init_exchange() ->
+ case lists:member(?MODULE, gen_event:which_handlers(lager_event)) of
+ true ->
+ _ = init_exchange(true),
+ ok;
+ _ ->
+ ok
+ end.
+
+validate_options([]) -> true;
+validate_options([{level, L}|T]) when is_atom(L) ->
+ case lists:member(L, ?LEVELS) of
+ false ->
+ throw({error, {fatal, {bad_level, L}}});
+ true ->
+ validate_options(T)
+ end;
+validate_options([{formatter, M}|T]) when is_atom(M) ->
+ validate_options(T);
+validate_options([{formatter_config, C}|T]) when is_list(C) ->
+ validate_options(T);
+validate_options([H|_]) ->
+ throw({error, {fatal, {bad_lager_exchange_backend_config, H}}}).
+
+get_option(K, Options, Default) ->
+ case lists:keyfind(K, 1, Options) of
+ {K, V} -> V;
+ false -> Default
+ end.
+
+handle_call(get_loglevel, #state{level=Level} = State) ->
+ {ok, Level, State};
+handle_call({set_loglevel, Level}, State) ->
+ try lager_util:config_to_mask(Level) of
+ Levels ->
+ {ok, ok, State#state{level=Levels}}
+ catch
+ _:_ ->
+ {ok, {error, bad_log_level}, State}
+ end;
+handle_call(_Request, State) ->
+ {ok, ok, State}.
+
+handle_event({log, _Message} = Event, State0) ->
+ State1 = maybe_init_exchange(State0),
+ handle_log_event(Event, State1);
+handle_event(_Event, State) ->
+ {ok, State}.
+
+handle_info(_Info, State) ->
+ {ok, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%% @private
+handle_log_event({log, _Message}, #state{exchange=undefined} = State) ->
+ % NB: tried to define the exchange but still undefined,
+ % so not logging this message. Note: we can't log this dropped
+ % message because it will start an infinite loop
+ {ok, State};
+handle_log_event({log, Message},
+ #state{level=L, exchange=LogExch,
+ formatter=Formatter, format_config=FormatConfig} = State) ->
+ case lager_util:is_loggable(Message, L, ?MODULE) of
+ true ->
+ %% 0-9-1 says the timestamp is a "64 bit POSIX timestamp". That's
+ %% second resolution, not millisecond.
+ RoutingKey = rabbit_data_coercion:to_binary(lager_msg:severity(Message)),
+ Timestamp = os:system_time(seconds),
+ Node = rabbit_data_coercion:to_binary(node()),
+ Headers = [{<<"node">>, longstr, Node}],
+ AmqpMsg = #'P_basic'{content_type = <<"text/plain">>,
+ timestamp = Timestamp,
+ headers = Headers},
+ Body = rabbit_data_coercion:to_binary(Formatter:format(Message, FormatConfig)),
+ case rabbit_basic:publish(LogExch, RoutingKey, AmqpMsg, Body) of
+ ok -> ok;
+ {error, not_found} -> ok
+ end,
+ {ok, State};
+ false ->
+ {ok, State}
+ end.
+
+%% @private
+maybe_init_exchange(#state{exchange=undefined, init_exchange_ts=undefined} = State) ->
+ Now = erlang:monotonic_time(second),
+ handle_init_exchange(init_exchange(true), Now, State);
+maybe_init_exchange(#state{exchange=undefined, init_exchange_ts=Timestamp} = State) ->
+ Now = erlang:monotonic_time(second),
+ % NB: since we may try to declare the exchange on every log message, this ensures
+ % that we only try once every 5 seconds
+ HasEnoughTimeElapsed = Now - Timestamp > ?INIT_EXCHANGE_INTERVAL_SECS,
+ Result = init_exchange(HasEnoughTimeElapsed),
+ handle_init_exchange(Result, Now, State);
+maybe_init_exchange(State) ->
+ State.
+
+%% @private
+init_exchange(true) ->
+ {ok, DefaultVHost} = application:get_env(rabbit, default_vhost),
+ Exchange = rabbit_misc:r(DefaultVHost, exchange, ?LOG_EXCH_NAME),
+ try
+ %% durable
+ #exchange{} = rabbit_exchange:declare(Exchange, topic, true, false, true, [], ?INTERNAL_USER),
+ rabbit_log:info("Declared exchange '~s' in vhost '~s'", [?LOG_EXCH_NAME, DefaultVHost]),
+ {ok, Exchange}
+ catch
+ ErrType:Err ->
+ rabbit_log:error("Could not declare exchange '~s' in vhost '~s', reason: ~p:~p",
+ [?LOG_EXCH_NAME, DefaultVHost, ErrType, Err]),
+ {ok, undefined}
+ end;
+init_exchange(_) ->
+ {ok, undefined}.
+
+%% @private
+handle_init_exchange({ok, undefined}, Now, State) ->
+ State#state{init_exchange_ts=Now};
+handle_init_exchange({ok, Exchange}, Now, State) ->
+ State#state{exchange=Exchange, init_exchange_ts=Now}.
+
+-ifdef(TEST).
+console_config_validation_test_() ->
+ Good = [{level, info}],
+ Bad1 = [{level, foo}],
+ Bad2 = [{larval, info}],
+ AllGood = [{level, info}, {formatter, my_formatter},
+ {formatter_config, ["blort", "garbage"]}],
+ [
+ ?_assertEqual(true, validate_options(Good)),
+ ?_assertThrow({error, {fatal, {bad_level, foo}}}, validate_options(Bad1)),
+ ?_assertThrow({error, {fatal, {bad_lager_exchange_backend_config, {larval, info}}}}, validate_options(Bad2)),
+ ?_assertEqual(true, validate_options(AllGood))
+ ].
+-endif.
diff --git a/deps/rabbit/src/lqueue.erl b/deps/rabbit/src/lqueue.erl
new file mode 100644
index 0000000000..1e267210d9
--- /dev/null
+++ b/deps/rabbit/src/lqueue.erl
@@ -0,0 +1,102 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2011-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(lqueue).
+
+%% lqueue implements a subset of Erlang's queue module. lqueues
+%% maintain their own length, so lqueue:len/1
+%% is an O(1) operation, in contrast with queue:len/1 which is O(n).
+
+-export([new/0, is_empty/1, len/1, in/2, in_r/2, out/1, out_r/1, join/2,
+ foldl/3, foldr/3, from_list/1, drop/1, to_list/1, peek/1, peek_r/1]).
+
+-define(QUEUE, queue).
+
+-export_type([
+ ?MODULE/0,
+ ?MODULE/1
+ ]).
+
+-opaque ?MODULE() :: ?MODULE(_).
+-opaque ?MODULE(T) :: {non_neg_integer(), queue:queue(T)}.
+-type value() :: any().
+-type result(T) :: 'empty' | {'value', T}.
+
+-spec new() -> ?MODULE(_).
+
+new() -> {0, ?QUEUE:new()}.
+
+-spec drop(?MODULE(T)) -> ?MODULE(T).
+
+drop({L, Q}) -> {L - 1, ?QUEUE:drop(Q)}.
+
+-spec is_empty(?MODULE(_)) -> boolean().
+
+is_empty({0, _Q}) -> true;
+is_empty(_) -> false.
+
+-spec in(T, ?MODULE(T)) -> ?MODULE(T).
+
+in(V, {L, Q}) -> {L+1, ?QUEUE:in(V, Q)}.
+
+-spec in_r(value(), ?MODULE(T)) -> ?MODULE(T).
+
+in_r(V, {L, Q}) -> {L+1, ?QUEUE:in_r(V, Q)}.
+
+-spec out(?MODULE(T)) -> {result(T), ?MODULE(T)}.
+
+out({0, _Q} = Q) -> {empty, Q};
+out({L, Q}) -> {Result, Q1} = ?QUEUE:out(Q),
+ {Result, {L-1, Q1}}.
+
+-spec out_r(?MODULE(T)) -> {result(T), ?MODULE(T)}.
+
+out_r({0, _Q} = Q) -> {empty, Q};
+out_r({L, Q}) -> {Result, Q1} = ?QUEUE:out_r(Q),
+ {Result, {L-1, Q1}}.
+
+-spec join(?MODULE(A), ?MODULE(B)) -> ?MODULE(A | B).
+
+join({L1, Q1}, {L2, Q2}) -> {L1 + L2, ?QUEUE:join(Q1, Q2)}.
+
+-spec to_list(?MODULE(T)) -> [T].
+
+to_list({_L, Q}) -> ?QUEUE:to_list(Q).
+
+-spec from_list([T]) -> ?MODULE(T).
+
+from_list(L) -> {length(L), ?QUEUE:from_list(L)}.
+
+-spec foldl(fun ((T, B) -> B), B, ?MODULE(T)) -> B.
+
+foldl(Fun, Init, Q) ->
+ case out(Q) of
+ {empty, _Q} -> Init;
+ {{value, V}, Q1} -> foldl(Fun, Fun(V, Init), Q1)
+ end.
+
+-spec foldr(fun ((T, B) -> B), B, ?MODULE(T)) -> B.
+
+foldr(Fun, Init, Q) ->
+ case out_r(Q) of
+ {empty, _Q} -> Init;
+ {{value, V}, Q1} -> foldr(Fun, Fun(V, Init), Q1)
+ end.
+
+-spec len(?MODULE(_)) -> non_neg_integer().
+
+len({L, _}) -> L.
+
+-spec peek(?MODULE(T)) -> result(T).
+
+peek({ 0, _Q}) -> empty;
+peek({_L, Q}) -> ?QUEUE:peek(Q).
+
+-spec peek_r(?MODULE(T)) -> result(T).
+
+peek_r({ 0, _Q}) -> empty;
+peek_r({_L, Q}) -> ?QUEUE:peek_r(Q).
diff --git a/deps/rabbit/src/mirrored_supervisor_sups.erl b/deps/rabbit/src/mirrored_supervisor_sups.erl
new file mode 100644
index 0000000000..b29d4d48e6
--- /dev/null
+++ b/deps/rabbit/src/mirrored_supervisor_sups.erl
@@ -0,0 +1,34 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2011-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(mirrored_supervisor_sups).
+
+-define(SUPERVISOR, supervisor2).
+-define(GS_MODULE, mirrored_supervisor).
+
+-behaviour(?SUPERVISOR).
+
+-export([init/1]).
+
+%%----------------------------------------------------------------------------
+
+init({overall, _Group, _TxFun, ignore}) -> ignore;
+init({overall, Group, TxFun, {ok, {Restart, ChildSpecs}}}) ->
+ %% Important: Delegate MUST start before Mirroring so that when we
+ %% shut down from above it shuts down last, so Mirroring does not
+ %% see it die.
+ %%
+ %% See comment in handle_info('DOWN', ...) in mirrored_supervisor
+ {ok, {{one_for_all, 0, 1},
+ [{delegate, {?SUPERVISOR, start_link, [?MODULE, {delegate, Restart}]},
+ temporary, 16#ffffffff, supervisor, [?SUPERVISOR]},
+ {mirroring, {?GS_MODULE, start_internal, [Group, TxFun, ChildSpecs]},
+ permanent, 16#ffffffff, worker, [?MODULE]}]}};
+
+
+init({delegate, Restart}) ->
+ {ok, {Restart, []}}.
diff --git a/deps/rabbit/src/pg_local.erl b/deps/rabbit/src/pg_local.erl
new file mode 100644
index 0000000000..263e743d1f
--- /dev/null
+++ b/deps/rabbit/src/pg_local.erl
@@ -0,0 +1,249 @@
+%% This file is a copy of pg2.erl from the R13B-3 Erlang/OTP
+%% distribution, with the following modifications:
+%%
+%% 1) Process groups are node-local only.
+%%
+%% 2) Groups are created/deleted implicitly.
+%%
+%% 3) 'join' and 'leave' are asynchronous.
+%%
+%% 4) the type specs of the exported non-callback functions have been
+%% extracted into a separate, guarded section, and rewritten in
+%% old-style spec syntax, for better compatibility with older
+%% versions of Erlang/OTP. The remaining type specs have been
+%% removed.
+
+%% All modifications are (C) 2010-2020 VMware, Inc. or its affiliates.
+
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1997-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at https://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(pg_local).
+
+-export([join/2, leave/2, get_members/1, in_group/2]).
+%% intended for testing only; not part of official API
+-export([sync/0, clear/0]).
+-export([start/0, start_link/0, init/1, handle_call/3, handle_cast/2,
+ handle_info/2, terminate/2]).
+
+%%----------------------------------------------------------------------------
+
+-type name() :: term().
+
+%%----------------------------------------------------------------------------
+
+-define(TABLE, pg_local_table).
+
+%%%
+%%% Exported functions
+%%%
+
+-spec start_link() -> {'ok', pid()} | {'error', any()}.
+
+start_link() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+-spec start() -> {'ok', pid()} | {'error', any()}.
+
+start() ->
+ ensure_started().
+
+-spec join(name(), pid()) -> 'ok'.
+
+join(Name, Pid) when is_pid(Pid) ->
+ _ = ensure_started(),
+ gen_server:cast(?MODULE, {join, Name, Pid}).
+
+-spec leave(name(), pid()) -> 'ok'.
+
+leave(Name, Pid) when is_pid(Pid) ->
+ _ = ensure_started(),
+ gen_server:cast(?MODULE, {leave, Name, Pid}).
+
+-spec get_members(name()) -> [pid()].
+
+get_members(Name) ->
+ _ = ensure_started(),
+ group_members(Name).
+
+-spec in_group(name(), pid()) -> boolean().
+
+in_group(Name, Pid) ->
+ _ = ensure_started(),
+ %% The join message is a cast and thus can race, but we want to
+ %% keep it that way to be fast in the common case.
+ case member_present(Name, Pid) of
+ true -> true;
+ false -> sync(),
+ member_present(Name, Pid)
+ end.
+
+-spec sync() -> 'ok'.
+
+sync() ->
+ _ = ensure_started(),
+ gen_server:call(?MODULE, sync, infinity).
+
+clear() ->
+ _ = ensure_started(),
+ gen_server:call(?MODULE, clear, infinity).
+
+%%%
+%%% Callback functions from gen_server
+%%%
+
+-record(state, {}).
+
+init([]) ->
+ ?TABLE = ets:new(?TABLE, [ordered_set, protected, named_table]),
+ {ok, #state{}}.
+
+handle_call(sync, _From, S) ->
+ {reply, ok, S};
+
+handle_call(clear, _From, S) ->
+ ets:delete_all_objects(?TABLE),
+ {reply, ok, S};
+
+handle_call(Request, From, S) ->
+ error_logger:warning_msg("The pg_local server received an unexpected message:\n"
+ "handle_call(~p, ~p, _)\n",
+ [Request, From]),
+ {noreply, S}.
+
+handle_cast({join, Name, Pid}, S) ->
+ _ = join_group(Name, Pid),
+ {noreply, S};
+handle_cast({leave, Name, Pid}, S) ->
+ leave_group(Name, Pid),
+ {noreply, S};
+handle_cast(_, S) ->
+ {noreply, S}.
+
+handle_info({'DOWN', MonitorRef, process, Pid, _Info}, S) ->
+ member_died(MonitorRef, Pid),
+ {noreply, S};
+handle_info(_, S) ->
+ {noreply, S}.
+
+terminate(_Reason, _S) ->
+ true = ets:delete(?TABLE),
+ ok.
+
+%%%
+%%% Local functions
+%%%
+
+%%% One ETS table, pg_local_table, is used for bookkeeping. The type of the
+%%% table is ordered_set, and the fast matching of partially
+%%% instantiated keys is used extensively.
+%%%
+%%% {{ref, Pid}, MonitorRef, Counter}
+%%% {{ref, MonitorRef}, Pid}
+%%% Each process has one monitor. Counter is incremented when the
+%%% Pid joins some group.
+%%% {{member, Name, Pid}, _}
+%%% Pid is a member of group Name, GroupCounter is incremented when the
+%%% Pid joins the group Name.
+%%% {{pid, Pid, Name}}
+%%% Pid is a member of group Name.
+
+member_died(Ref, Pid) ->
+ case ets:lookup(?TABLE, {ref, Ref}) of
+ [{{ref, Ref}, Pid}] ->
+ leave_all_groups(Pid);
+ %% in case the key has already been removed
+ %% we can clean up using the value from the DOWN message
+ _ ->
+ leave_all_groups(Pid)
+ end,
+ ok.
+
+leave_all_groups(Pid) ->
+ Names = member_groups(Pid),
+ _ = [leave_group(Name, P) ||
+ Name <- Names,
+ P <- member_in_group(Pid, Name)].
+
+join_group(Name, Pid) ->
+ Ref_Pid = {ref, Pid},
+ try _ = ets:update_counter(?TABLE, Ref_Pid, {3, +1})
+ catch _:_ ->
+ Ref = erlang:monitor(process, Pid),
+ true = ets:insert(?TABLE, {Ref_Pid, Ref, 1}),
+ true = ets:insert(?TABLE, {{ref, Ref}, Pid})
+ end,
+ Member_Name_Pid = {member, Name, Pid},
+ try _ = ets:update_counter(?TABLE, Member_Name_Pid, {2, +1})
+ catch _:_ ->
+ true = ets:insert(?TABLE, {Member_Name_Pid, 1}),
+ true = ets:insert(?TABLE, {{pid, Pid, Name}})
+ end.
+
+leave_group(Name, Pid) ->
+ Member_Name_Pid = {member, Name, Pid},
+ try ets:update_counter(?TABLE, Member_Name_Pid, {2, -1}) of
+ N ->
+ if
+ N =:= 0 ->
+ true = ets:delete(?TABLE, {pid, Pid, Name}),
+ true = ets:delete(?TABLE, Member_Name_Pid);
+ true ->
+ ok
+ end,
+ Ref_Pid = {ref, Pid},
+ case ets:update_counter(?TABLE, Ref_Pid, {3, -1}) of
+ 0 ->
+ [{Ref_Pid,Ref,0}] = ets:lookup(?TABLE, Ref_Pid),
+ true = ets:delete(?TABLE, {ref, Ref}),
+ true = ets:delete(?TABLE, Ref_Pid),
+ true = erlang:demonitor(Ref, [flush]),
+ ok;
+ _ ->
+ ok
+ end
+ catch _:_ ->
+ ok
+ end.
+
+group_members(Name) ->
+ [P ||
+ [P, N] <- ets:match(?TABLE, {{member, Name, '$1'},'$2'}),
+ _ <- lists:seq(1, N)].
+
+member_in_group(Pid, Name) ->
+ [{{member, Name, Pid}, N}] = ets:lookup(?TABLE, {member, Name, Pid}),
+ lists:duplicate(N, Pid).
+
+member_present(Name, Pid) ->
+ case ets:lookup(?TABLE, {member, Name, Pid}) of
+ [_] -> true;
+ [] -> false
+ end.
+
+member_groups(Pid) ->
+ [Name || [Name] <- ets:match(?TABLE, {{pid, Pid, '$1'}})].
+
+ensure_started() ->
+ case whereis(?MODULE) of
+ undefined ->
+ C = {pg_local, {?MODULE, start_link, []}, permanent,
+ 16#ffffffff, worker, [?MODULE]},
+ supervisor:start_child(kernel_safe_sup, C);
+ PgLocalPid ->
+ {ok, PgLocalPid}
+ end.
diff --git a/deps/rabbit/src/rabbit.erl b/deps/rabbit/src/rabbit.erl
new file mode 100644
index 0000000000..9248c945dc
--- /dev/null
+++ b/deps/rabbit/src/rabbit.erl
@@ -0,0 +1,1511 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit).
+
+%% Transitional step until we can require Erlang/OTP 21 and
+%% use the now recommended try/catch syntax for obtaining the stack trace.
+-compile(nowarn_deprecated_function).
+
+-behaviour(application).
+
+-export([start/0, boot/0, stop/0,
+ stop_and_halt/0, await_startup/0, await_startup/1, await_startup/3,
+ status/0, is_running/0, alarms/0,
+ is_running/1, environment/0, rotate_logs/0, force_event_refresh/1,
+ start_fhc/0]).
+
+-export([start/2, stop/1, prep_stop/1]).
+-export([start_apps/1, start_apps/2, stop_apps/1]).
+-export([product_info/0,
+ product_name/0,
+ product_version/0,
+ base_product_name/0,
+ base_product_version/0,
+ motd_file/0,
+ motd/0]).
+-export([log_locations/0, config_files/0]). %% for testing and mgmt-agent
+-export([is_booted/1, is_booted/0, is_booting/1, is_booting/0]).
+
+%%---------------------------------------------------------------------------
+%% Boot steps.
+-export([maybe_insert_default_data/0, boot_delegate/0, recover/0]).
+
+%% for tests
+-export([validate_msg_store_io_batch_size_and_credit_disc_bound/2]).
+
+-rabbit_boot_step({pre_boot, [{description, "rabbit boot start"}]}).
+
+-rabbit_boot_step({codec_correctness_check,
+ [{description, "codec correctness check"},
+ {mfa, {rabbit_binary_generator,
+ check_empty_frame_size,
+ []}},
+ {requires, pre_boot},
+ {enables, external_infrastructure}]}).
+
+%% rabbit_alarm currently starts memory and disk space monitors
+-rabbit_boot_step({rabbit_alarm,
+ [{description, "alarm handler"},
+ {mfa, {rabbit_alarm, start, []}},
+ {requires, pre_boot},
+ {enables, external_infrastructure}]}).
+
+-rabbit_boot_step({feature_flags,
+ [{description, "feature flags registry and initial state"},
+ {mfa, {rabbit_feature_flags, init, []}},
+ {requires, pre_boot},
+ {enables, external_infrastructure}]}).
+
+-rabbit_boot_step({database,
+ [{mfa, {rabbit_mnesia, init, []}},
+ {requires, file_handle_cache},
+ {enables, external_infrastructure}]}).
+
+-rabbit_boot_step({database_sync,
+ [{description, "database sync"},
+ {mfa, {rabbit_sup, start_child, [mnesia_sync]}},
+ {requires, database},
+ {enables, external_infrastructure}]}).
+
+-rabbit_boot_step({code_server_cache,
+ [{description, "code_server cache server"},
+ {mfa, {rabbit_sup, start_child, [code_server_cache]}},
+ {requires, rabbit_alarm},
+ {enables, file_handle_cache}]}).
+
+-rabbit_boot_step({file_handle_cache,
+ [{description, "file handle cache server"},
+ {mfa, {rabbit, start_fhc, []}},
+ %% FHC needs memory monitor to be running
+ {requires, code_server_cache},
+ {enables, worker_pool}]}).
+
+-rabbit_boot_step({worker_pool,
+ [{description, "default worker pool"},
+ {mfa, {rabbit_sup, start_supervisor_child,
+ [worker_pool_sup]}},
+ {requires, pre_boot},
+ {enables, external_infrastructure}]}).
+
+-rabbit_boot_step({definition_import_worker_pool,
+ [{description, "dedicated worker pool for definition import"},
+ {mfa, {rabbit_definitions, boot, []}},
+ {requires, external_infrastructure}]}).
+
+-rabbit_boot_step({external_infrastructure,
+ [{description, "external infrastructure ready"}]}).
+
+-rabbit_boot_step({rabbit_registry,
+ [{description, "plugin registry"},
+ {mfa, {rabbit_sup, start_child,
+ [rabbit_registry]}},
+ {requires, external_infrastructure},
+ {enables, kernel_ready}]}).
+
+-rabbit_boot_step({rabbit_core_metrics,
+ [{description, "core metrics storage"},
+ {mfa, {rabbit_sup, start_child,
+ [rabbit_metrics]}},
+ {requires, pre_boot},
+ {enables, external_infrastructure}]}).
+
+-rabbit_boot_step({rabbit_osiris_metrics,
+ [{description, "osiris metrics scraper"},
+ {mfa, {rabbit_sup, start_child,
+ [rabbit_osiris_metrics]}},
+ {requires, pre_boot},
+ {enables, external_infrastructure}]}).
+
+%% -rabbit_boot_step({rabbit_stream_coordinator,
+%% [{description, "stream queues coordinator"},
+%% {mfa, {rabbit_stream_coordinator, start,
+%% []}},
+%% {requires, pre_boot},
+%% {enables, external_infrastructure}]}).
+
+-rabbit_boot_step({rabbit_event,
+ [{description, "statistics event manager"},
+ {mfa, {rabbit_sup, start_restartable_child,
+ [rabbit_event]}},
+ {requires, external_infrastructure},
+ {enables, kernel_ready}]}).
+
+-rabbit_boot_step({kernel_ready,
+ [{description, "kernel ready"},
+ {requires, external_infrastructure}]}).
+
+-rabbit_boot_step({rabbit_memory_monitor,
+ [{description, "memory monitor"},
+ {mfa, {rabbit_sup, start_restartable_child,
+ [rabbit_memory_monitor]}},
+ {requires, rabbit_alarm},
+ {enables, core_initialized}]}).
+
+-rabbit_boot_step({guid_generator,
+ [{description, "guid generator"},
+ {mfa, {rabbit_sup, start_restartable_child,
+ [rabbit_guid]}},
+ {requires, kernel_ready},
+ {enables, core_initialized}]}).
+
+-rabbit_boot_step({delegate_sup,
+ [{description, "cluster delegate"},
+ {mfa, {rabbit, boot_delegate, []}},
+ {requires, kernel_ready},
+ {enables, core_initialized}]}).
+
+-rabbit_boot_step({rabbit_node_monitor,
+ [{description, "node monitor"},
+ {mfa, {rabbit_sup, start_restartable_child,
+ [rabbit_node_monitor]}},
+ {requires, [rabbit_alarm, guid_generator]},
+ {enables, core_initialized}]}).
+
+-rabbit_boot_step({rabbit_epmd_monitor,
+ [{description, "epmd monitor"},
+ {mfa, {rabbit_sup, start_restartable_child,
+ [rabbit_epmd_monitor]}},
+ {requires, kernel_ready},
+ {enables, core_initialized}]}).
+
+-rabbit_boot_step({rabbit_sysmon_minder,
+ [{description, "sysmon_handler supervisor"},
+ {mfa, {rabbit_sup, start_restartable_child,
+ [rabbit_sysmon_minder]}},
+ {requires, kernel_ready},
+ {enables, core_initialized}]}).
+
+-rabbit_boot_step({core_initialized,
+ [{description, "core initialized"},
+ {requires, kernel_ready}]}).
+
+-rabbit_boot_step({upgrade_queues,
+ [{description, "per-vhost message store migration"},
+ {mfa, {rabbit_upgrade,
+ maybe_migrate_queues_to_per_vhost_storage,
+ []}},
+ {requires, [core_initialized]},
+ {enables, recovery}]}).
+
+-rabbit_boot_step({recovery,
+ [{description, "exchange, queue and binding recovery"},
+ {mfa, {rabbit, recover, []}},
+ {requires, [core_initialized]},
+ {enables, routing_ready}]}).
+
+-rabbit_boot_step({empty_db_check,
+ [{description, "empty DB check"},
+ {mfa, {?MODULE, maybe_insert_default_data, []}},
+ {requires, recovery},
+ {enables, routing_ready}]}).
+
+-rabbit_boot_step({routing_ready,
+ [{description, "message delivery logic ready"},
+ {requires, [core_initialized, recovery]}]}).
+
+-rabbit_boot_step({connection_tracking,
+ [{description, "connection tracking infrastructure"},
+ {mfa, {rabbit_connection_tracking, boot, []}},
+ {enables, routing_ready}]}).
+
+-rabbit_boot_step({channel_tracking,
+ [{description, "channel tracking infrastructure"},
+ {mfa, {rabbit_channel_tracking, boot, []}},
+ {enables, routing_ready}]}).
+
+-rabbit_boot_step({background_gc,
+ [{description, "background garbage collection"},
+ {mfa, {rabbit_sup, start_restartable_child,
+ [background_gc]}},
+ {requires, [core_initialized, recovery]},
+ {enables, routing_ready}]}).
+
+-rabbit_boot_step({rabbit_core_metrics_gc,
+ [{description, "background core metrics garbage collection"},
+ {mfa, {rabbit_sup, start_restartable_child,
+ [rabbit_core_metrics_gc]}},
+ {requires, [core_initialized, recovery]},
+ {enables, routing_ready}]}).
+
+-rabbit_boot_step({rabbit_looking_glass,
+ [{description, "Looking Glass tracer and profiler"},
+ {mfa, {rabbit_looking_glass, boot, []}},
+ {requires, [core_initialized, recovery]},
+ {enables, routing_ready}]}).
+
+-rabbit_boot_step({pre_flight,
+ [{description, "ready to communicate with peers and clients"},
+ {requires, [core_initialized, recovery, routing_ready]}]}).
+
+-rabbit_boot_step({cluster_name,
+ [{description, "sets cluster name if configured"},
+ {mfa, {rabbit_nodes, boot, []}},
+ {requires, pre_flight}
+ ]}).
+
+-rabbit_boot_step({direct_client,
+ [{description, "direct client"},
+ {mfa, {rabbit_direct, boot, []}},
+ {requires, pre_flight}
+ ]}).
+
+-rabbit_boot_step({notify_cluster,
+ [{description, "notifies cluster peers of our presence"},
+ {mfa, {rabbit_node_monitor, notify_node_up, []}},
+ {requires, pre_flight}]}).
+
+-rabbit_boot_step({networking,
+ [{description, "TCP and TLS listeners (backwards compatibility)"},
+ {mfa, {rabbit_log, debug, ["'networking' boot step skipped and moved to end of startup", []]}},
+ {requires, notify_cluster}]}).
+
+%%---------------------------------------------------------------------------
+
+-include("rabbit_framing.hrl").
+-include("rabbit.hrl").
+
+-define(APPS, [os_mon, mnesia, rabbit_common, rabbitmq_prelaunch, ra, sysmon_handler, rabbit, osiris]).
+
+-define(ASYNC_THREADS_WARNING_THRESHOLD, 8).
+
+%% 1 minute
+-define(BOOT_START_TIMEOUT, 1 * 60 * 1000).
+%% 12 hours
+-define(BOOT_FINISH_TIMEOUT, 12 * 60 * 60 * 1000).
+%% 100 ms
+-define(BOOT_STATUS_CHECK_INTERVAL, 100).
+
+%%----------------------------------------------------------------------------
+
+-type restart_type() :: 'permanent' | 'transient' | 'temporary'.
+
+-type param() :: atom().
+-type app_name() :: atom().
+
+%%----------------------------------------------------------------------------
+
+-spec start() -> 'ok'.
+
+start() ->
+ %% start() vs. boot(): we want to throw an error in start().
+ start_it(temporary).
+
+-spec boot() -> 'ok'.
+
+boot() ->
+ %% start() vs. boot(): we want the node to exit in boot(). Because
+ %% applications are started with `transient`, any error during their
+ %% startup will abort the node.
+ start_it(transient).
+
+run_prelaunch_second_phase() ->
+ %% Finish the prelaunch phase started by the `rabbitmq_prelaunch`
+ %% application.
+ %%
+ %% The first phase was handled by the `rabbitmq_prelaunch`
+ %% application. It was started in one of the following way:
+ %% - from an Erlang release boot script;
+ %% - from the rabbit:boot/0 or rabbit:start/0 functions.
+ %%
+ %% The `rabbitmq_prelaunch` application creates the context map from
+ %% the environment and the configuration files early during Erlang
+ %% VM startup. Once it is done, all application environments are
+ %% configured (in particular `mnesia` and `ra`).
+ %%
+ %% This second phase depends on other modules & facilities of
+ %% RabbitMQ core. That's why we need to run it now, from the
+ %% `rabbit` application start function.
+
+ %% We assert Mnesia is stopped before we run the prelaunch
+ %% phases. See `rabbit_prelaunch` for an explanation.
+ %%
+ %% This is the second assertion, just in case Mnesia is started
+ %% between the two prelaunch phases.
+ rabbit_prelaunch:assert_mnesia_is_stopped(),
+
+ %% Get the context created by `rabbitmq_prelaunch` then proceed
+ %% with all steps in this phase.
+ #{initial_pass := IsInitialPass} =
+ Context = rabbit_prelaunch:get_context(),
+
+ case IsInitialPass of
+ true ->
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug(
+ "== Prelaunch phase [2/2] (initial pass) ==");
+ false ->
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug("== Prelaunch phase [2/2] =="),
+ ok
+ end,
+
+ %% 1. Enabled plugins file.
+ ok = rabbit_prelaunch_enabled_plugins_file:setup(Context),
+
+ %% 2. Feature flags registry.
+ ok = rabbit_prelaunch_feature_flags:setup(Context),
+
+ %% 3. Logging.
+ ok = rabbit_prelaunch_logging:setup(Context),
+
+ %% 4. Clustering.
+ ok = rabbit_prelaunch_cluster:setup(Context),
+
+ %% Start Mnesia now that everything is ready.
+ rabbit_log_prelaunch:debug("Starting Mnesia"),
+ ok = mnesia:start(),
+
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug("== Prelaunch DONE =="),
+
+ case IsInitialPass of
+ true -> rabbit_prelaunch:initial_pass_finished();
+ false -> ok
+ end,
+ ok.
+
+start_it(StartType) ->
+ case spawn_boot_marker() of
+ {ok, Marker} ->
+ T0 = erlang:timestamp(),
+ rabbit_log:info("RabbitMQ is asked to start...", []),
+ try
+ {ok, _} = application:ensure_all_started(rabbitmq_prelaunch,
+ StartType),
+ {ok, _} = application:ensure_all_started(rabbit,
+ StartType),
+ ok = wait_for_ready_or_stopped(),
+
+ T1 = erlang:timestamp(),
+ rabbit_log_prelaunch:debug(
+ "Time to start RabbitMQ: ~p µs",
+ [timer:now_diff(T1, T0)]),
+ stop_boot_marker(Marker),
+ ok
+ catch
+ error:{badmatch, Error}:_ ->
+ stop_boot_marker(Marker),
+ case StartType of
+ temporary -> throw(Error);
+ _ -> exit(Error)
+ end
+ end;
+ {already_booting, Marker} ->
+ stop_boot_marker(Marker),
+ ok
+ end.
+
+wait_for_ready_or_stopped() ->
+ ok = rabbit_boot_state:wait_for(ready, ?BOOT_FINISH_TIMEOUT),
+ case rabbit_boot_state:get() of
+ ready ->
+ ok;
+ _ ->
+ ok = rabbit_boot_state:wait_for(stopped, ?BOOT_FINISH_TIMEOUT),
+ rabbit_prelaunch:get_stop_reason()
+ end.
+
+spawn_boot_marker() ->
+ %% Compatibility with older RabbitMQ versions:
+ %% We register a process doing nothing to indicate that RabbitMQ is
+ %% booting. This is checked by `is_booting(Node)` on a remote node.
+ Marker = spawn_link(fun() -> receive stop -> ok end end),
+ case catch register(rabbit_boot, Marker) of
+ true -> {ok, Marker};
+ _ -> {already_booting, Marker}
+ end.
+
+stop_boot_marker(Marker) ->
+ unlink(Marker),
+ Marker ! stop,
+ ok.
+
+-spec stop() -> 'ok'.
+
+stop() ->
+ case wait_for_ready_or_stopped() of
+ ok ->
+ case rabbit_boot_state:get() of
+ ready ->
+ Product = product_name(),
+ rabbit_log:info("~s is asked to stop...", [Product]),
+ do_stop(),
+ rabbit_log:info(
+ "Successfully stopped ~s and its dependencies",
+ [Product]),
+ ok;
+ stopped ->
+ ok
+ end;
+ _ ->
+ ok
+ end.
+
+do_stop() ->
+ Apps0 = ?APPS ++ rabbit_plugins:active(),
+ %% We ensure that Mnesia is stopped last (or more exactly, after rabbit).
+ Apps1 = app_utils:app_dependency_order(Apps0, true) -- [mnesia],
+ Apps = [mnesia | Apps1],
+ %% this will also perform unregistration with the peer discovery backend
+ %% as needed
+ stop_apps(Apps).
+
+-spec stop_and_halt() -> no_return().
+
+stop_and_halt() ->
+ try
+ stop()
+ catch Type:Reason ->
+ rabbit_log:error(
+ "Error trying to stop ~s: ~p:~p",
+ [product_name(), Type, Reason]),
+ error({Type, Reason})
+ after
+ %% Enclose all the logging in the try block.
+ %% init:stop() will be called regardless of any errors.
+ try
+ AppsLeft = [ A || {A, _, _} <- application:which_applications() ],
+ rabbit_log:info(
+ lists:flatten(["Halting Erlang VM with the following applications:~n",
+ [" ~p~n" || _ <- AppsLeft]]),
+ AppsLeft),
+ %% Also duplicate this information to stderr, so console where
+ %% foreground broker was running (or systemd journal) will
+ %% contain information about graceful termination.
+ io:format(standard_error, "Gracefully halting Erlang VM~n", [])
+ after
+ init:stop()
+ end
+ end,
+ ok.
+
+-spec start_apps([app_name()]) -> 'ok'.
+
+start_apps(Apps) ->
+ start_apps(Apps, #{}).
+
+-spec start_apps([app_name()],
+ #{app_name() => restart_type()}) -> 'ok'.
+
+%% TODO: start_apps/2 and is now specific to plugins. This function
+%% should be moved over `rabbit_plugins`, along with stop_apps/1, once
+%% the latter stops using app_utils as well.
+
+start_apps(Apps, RestartTypes) ->
+ false = lists:member(rabbit, Apps), %% Assertion.
+ %% We need to load all applications involved in order to be able to
+ %% find new feature flags.
+ app_utils:load_applications(Apps),
+ ok = rabbit_feature_flags:refresh_feature_flags_after_app_load(Apps),
+ rabbit_prelaunch_conf:decrypt_config(Apps),
+ lists:foreach(
+ fun(App) ->
+ RestartType = maps:get(App, RestartTypes, temporary),
+ ok = rabbit_boot_steps:run_boot_steps([App]),
+ case application:ensure_all_started(App, RestartType) of
+ {ok, _} -> ok;
+ {error, Reason} -> throw({could_not_start, App, Reason})
+ end
+ end, Apps).
+
+-spec stop_apps([app_name()]) -> 'ok'.
+
+stop_apps([]) ->
+ ok;
+stop_apps(Apps) ->
+ rabbit_log:info(
+ lists:flatten(["Stopping ~s applications and their dependencies in the following order:~n",
+ [" ~p~n" || _ <- Apps]]),
+ [product_name() | lists:reverse(Apps)]),
+ ok = app_utils:stop_applications(
+ Apps, handle_app_error(error_during_shutdown)),
+ case lists:member(rabbit, Apps) of
+ %% plugin deactivation
+ false -> rabbit_boot_steps:run_cleanup_steps(Apps);
+ true -> ok %% it's all going anyway
+ end,
+ ok.
+
+-spec handle_app_error(_) -> fun((_, _) -> no_return()).
+handle_app_error(Term) ->
+ fun(App, {bad_return, {_MFA, {'EXIT', ExitReason}}}) ->
+ throw({Term, App, ExitReason});
+ (App, Reason) ->
+ throw({Term, App, Reason})
+ end.
+
+is_booting() -> is_booting(node()).
+
+is_booting(Node) when Node =:= node() ->
+ case rabbit_boot_state:get() of
+ booting -> true;
+ _ -> false
+ end;
+is_booting(Node) ->
+ case rpc:call(Node, rabbit, is_booting, []) of
+ {badrpc, _} = Err -> Err;
+ Ret -> Ret
+ end.
+
+
+-spec await_startup() -> 'ok' | {'error', 'timeout'}.
+
+await_startup() ->
+ await_startup(node(), false).
+
+-spec await_startup(node() | non_neg_integer()) -> 'ok' | {'error', 'timeout'}.
+
+await_startup(Node) when is_atom(Node) ->
+ await_startup(Node, false);
+ await_startup(Timeout) when is_integer(Timeout) ->
+ await_startup(node(), false, Timeout).
+
+-spec await_startup(node(), boolean()) -> 'ok' | {'error', 'timeout'}.
+
+await_startup(Node, PrintProgressReports) ->
+ case is_booting(Node) of
+ true -> wait_for_boot_to_finish(Node, PrintProgressReports);
+ false ->
+ case is_running(Node) of
+ true -> ok;
+ false -> wait_for_boot_to_start(Node),
+ wait_for_boot_to_finish(Node, PrintProgressReports)
+ end
+ end.
+
+-spec await_startup(node(), boolean(), non_neg_integer()) -> 'ok' | {'error', 'timeout'}.
+
+await_startup(Node, PrintProgressReports, Timeout) ->
+ case is_booting(Node) of
+ true -> wait_for_boot_to_finish(Node, PrintProgressReports, Timeout);
+ false ->
+ case is_running(Node) of
+ true -> ok;
+ false -> wait_for_boot_to_start(Node, Timeout),
+ wait_for_boot_to_finish(Node, PrintProgressReports, Timeout)
+ end
+ end.
+
+wait_for_boot_to_start(Node) ->
+ wait_for_boot_to_start(Node, ?BOOT_START_TIMEOUT).
+
+wait_for_boot_to_start(Node, infinity) ->
+ %% This assumes that 100K iterations is close enough to "infinity".
+ %% Now that's deep.
+ do_wait_for_boot_to_start(Node, 100000);
+wait_for_boot_to_start(Node, Timeout) ->
+ Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL,
+ do_wait_for_boot_to_start(Node, Iterations).
+
+do_wait_for_boot_to_start(_Node, IterationsLeft) when IterationsLeft =< 0 ->
+ {error, timeout};
+do_wait_for_boot_to_start(Node, IterationsLeft) ->
+ case is_booting(Node) of
+ false ->
+ timer:sleep(?BOOT_STATUS_CHECK_INTERVAL),
+ do_wait_for_boot_to_start(Node, IterationsLeft - 1);
+ {badrpc, _} = Err ->
+ Err;
+ true ->
+ ok
+ end.
+
+wait_for_boot_to_finish(Node, PrintProgressReports) ->
+ wait_for_boot_to_finish(Node, PrintProgressReports, ?BOOT_FINISH_TIMEOUT).
+
+wait_for_boot_to_finish(Node, PrintProgressReports, infinity) ->
+ %% This assumes that 100K iterations is close enough to "infinity".
+ %% Now that's deep.
+ do_wait_for_boot_to_finish(Node, PrintProgressReports, 100000);
+wait_for_boot_to_finish(Node, PrintProgressReports, Timeout) ->
+ Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL,
+ do_wait_for_boot_to_finish(Node, PrintProgressReports, Iterations).
+
+do_wait_for_boot_to_finish(_Node, _PrintProgressReports, IterationsLeft) when IterationsLeft =< 0 ->
+ {error, timeout};
+do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft) ->
+ case is_booting(Node) of
+ false ->
+ %% We don't want badrpc error to be interpreted as false,
+ %% so we don't call rabbit:is_running(Node)
+ case rpc:call(Node, rabbit, is_running, []) of
+ true -> ok;
+ false -> {error, rabbit_is_not_running};
+ {badrpc, _} = Err -> Err
+ end;
+ {badrpc, _} = Err ->
+ Err;
+ true ->
+ maybe_print_boot_progress(PrintProgressReports, IterationsLeft),
+ timer:sleep(?BOOT_STATUS_CHECK_INTERVAL),
+ do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft - 1)
+ end.
+
+maybe_print_boot_progress(false = _PrintProgressReports, _IterationsLeft) ->
+ ok;
+maybe_print_boot_progress(true, IterationsLeft) ->
+ case IterationsLeft rem 100 of
+ %% This will be printed on the CLI command end to illustrate some
+ %% progress.
+ 0 -> io:format("Still booting, will check again in 10 seconds...~n");
+ _ -> ok
+ end.
+
+-spec status
+ () -> [{pid, integer()} |
+ {running_applications, [{atom(), string(), string()}]} |
+ {os, {atom(), atom()}} |
+ {erlang_version, string()} |
+ {memory, any()}].
+
+status() ->
+ Version = base_product_version(),
+ S1 = [{pid, list_to_integer(os:getpid())},
+ %% The timeout value used is twice that of gen_server:call/2.
+ {running_applications, rabbit_misc:which_applications()},
+ {os, os:type()},
+ {rabbitmq_version, Version},
+ {erlang_version, erlang:system_info(system_version)},
+ {memory, rabbit_vm:memory()},
+ {alarms, alarms()},
+ {is_under_maintenance, rabbit_maintenance:is_being_drained_local_read(node())},
+ {listeners, listeners()},
+ {vm_memory_calculation_strategy, vm_memory_monitor:get_memory_calculation_strategy()}],
+ S2 = rabbit_misc:filter_exit_map(
+ fun ({Key, {M, F, A}}) -> {Key, erlang:apply(M, F, A)} end,
+ [{vm_memory_high_watermark, {vm_memory_monitor,
+ get_vm_memory_high_watermark, []}},
+ {vm_memory_limit, {vm_memory_monitor,
+ get_memory_limit, []}},
+ {disk_free_limit, {rabbit_disk_monitor,
+ get_disk_free_limit, []}},
+ {disk_free, {rabbit_disk_monitor,
+ get_disk_free, []}}]),
+ S3 = rabbit_misc:with_exit_handler(
+ fun () -> [] end,
+ fun () -> [{file_descriptors, file_handle_cache:info()}] end),
+ S4 = [{processes, [{limit, erlang:system_info(process_limit)},
+ {used, erlang:system_info(process_count)}]},
+ {run_queue, erlang:statistics(run_queue)},
+ {uptime, begin
+ {T,_} = erlang:statistics(wall_clock),
+ T div 1000
+ end},
+ {kernel, {net_ticktime, net_kernel:get_net_ticktime()}}],
+ S5 = [{active_plugins, rabbit_plugins:active()},
+ {enabled_plugin_file, rabbit_plugins:enabled_plugins_file()}],
+ S6 = [{config_files, config_files()},
+ {log_files, log_locations()},
+ {data_directory, rabbit_mnesia:dir()},
+ {raft_data_directory, ra_env:data_dir()}],
+ Totals = case is_running() of
+ true ->
+ [{virtual_host_count, rabbit_vhost:count()},
+ {connection_count,
+ length(rabbit_networking:connections_local())},
+ {queue_count, total_queue_count()}];
+ false ->
+ []
+ end,
+ S7 = [{totals, Totals}],
+ S8 = lists:filter(
+ fun
+ ({product_base_name, _}) -> true;
+ ({product_base_version, _}) -> true;
+ ({product_name, _}) -> true;
+ ({product_version, _}) -> true;
+ (_) -> false
+ end,
+ maps:to_list(product_info())),
+ S1 ++ S2 ++ S3 ++ S4 ++ S5 ++ S6 ++ S7 ++ S8.
+
+alarms() ->
+ Alarms = rabbit_misc:with_exit_handler(rabbit_misc:const([]),
+ fun rabbit_alarm:get_alarms/0),
+ N = node(),
+ %% [{{resource_limit,memory,rabbit@mercurio},[]}]
+ [{resource_limit, Limit, Node} || {{resource_limit, Limit, Node}, _} <- Alarms, Node =:= N].
+
+listeners() ->
+ Listeners = try
+ rabbit_networking:active_listeners()
+ catch
+ exit:{aborted, _} -> []
+ end,
+ [L || L = #listener{node = Node} <- Listeners, Node =:= node()].
+
+total_queue_count() ->
+ lists:foldl(fun (VirtualHost, Acc) ->
+ Acc + rabbit_amqqueue:count(VirtualHost)
+ end,
+ 0, rabbit_vhost:list_names()).
+
+-spec is_running() -> boolean().
+
+is_running() -> is_running(node()).
+
+-spec is_running(node()) -> boolean().
+
+is_running(Node) when Node =:= node() ->
+ case rabbit_boot_state:get() of
+ ready -> true;
+ _ -> false
+ end;
+is_running(Node) ->
+ case rpc:call(Node, rabbit, is_running, []) of
+ true -> true;
+ _ -> false
+ end.
+
+is_booted() -> is_booted(node()).
+
+is_booted(Node) ->
+ case is_booting(Node) of
+ false ->
+ is_running(Node);
+ _ -> false
+ end.
+
+-spec environment() -> [{param(), term()}].
+
+environment() ->
+ %% The timeout value is twice that of gen_server:call/2.
+ [{A, environment(A)} ||
+ {A, _, _} <- lists:keysort(1, application:which_applications(10000))].
+
+environment(App) ->
+ Ignore = [default_pass, included_applications],
+ lists:keysort(1, [P || P = {K, _} <- application:get_all_env(App),
+ not lists:member(K, Ignore)]).
+
+-spec rotate_logs() -> rabbit_types:ok_or_error(any()).
+
+rotate_logs() ->
+ rabbit_lager:fold_sinks(
+ fun
+ (_, [], Acc) ->
+ Acc;
+ (SinkName, FileNames, Acc) ->
+ lager:log(SinkName, info, self(),
+ "Log file rotation forced", []),
+ %% FIXME: We use an internal message, understood by
+ %% lager_file_backend. We should use a proper API, when
+ %% it's added to Lager.
+ %%
+ %% FIXME: This call is effectively asynchronous: at the
+ %% end of this function, we can't guaranty the rotation
+ %% is completed.
+ [ok = gen_event:call(SinkName,
+ {lager_file_backend, FileName},
+ rotate,
+ infinity) || FileName <- FileNames],
+ lager:log(SinkName, info, self(),
+ "Log file re-opened after forced rotation", []),
+ Acc
+ end, ok).
+
+%%--------------------------------------------------------------------
+
+-spec start('normal',[]) ->
+ {'error',
+ {'erlang_version_too_old',
+ {'found',string(),string()},
+ {'required',string(),string()}}} |
+ {'ok',pid()}.
+
+start(normal, []) ->
+ %% Reset boot state and clear the stop reason again (it was already
+ %% made in rabbitmq_prelaunch).
+ %%
+ %% This is important if the previous startup attempt failed after
+ %% rabbitmq_prelaunch was started and the application is still
+ %% running.
+ rabbit_boot_state:set(booting),
+ rabbit_prelaunch:clear_stop_reason(),
+
+ try
+ run_prelaunch_second_phase(),
+
+ ProductInfo = product_info(),
+ case ProductInfo of
+ #{product_overridden := true,
+ product_base_name := BaseName,
+ product_base_version := BaseVersion} ->
+ rabbit_log:info("~n Starting ~s ~s on Erlang ~s~n Based on ~s ~s~n ~s~n ~s~n",
+ [product_name(), product_version(), rabbit_misc:otp_release(),
+ BaseName, BaseVersion,
+ ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]);
+ _ ->
+ rabbit_log:info("~n Starting ~s ~s on Erlang ~s~n ~s~n ~s~n",
+ [product_name(), product_version(), rabbit_misc:otp_release(),
+ ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE])
+ end,
+ log_motd(),
+ {ok, SupPid} = rabbit_sup:start_link(),
+
+ %% Compatibility with older RabbitMQ versions + required by
+ %% rabbit_node_monitor:notify_node_up/0:
+ %%
+ %% We register the app process under the name `rabbit`. This is
+ %% checked by `is_running(Node)` on a remote node. The process
+ %% is also monitord by rabbit_node_monitor.
+ %%
+ %% The process name must be registered *before* running the boot
+ %% steps: that's when rabbit_node_monitor will set the process
+ %% monitor up.
+ %%
+ %% Note that plugins were not taken care of at this point
+ %% either.
+ rabbit_log_prelaunch:debug(
+ "Register `rabbit` process (~p) for rabbit_node_monitor",
+ [self()]),
+ true = register(rabbit, self()),
+
+ print_banner(),
+ log_banner(),
+ warn_if_kernel_config_dubious(),
+ warn_if_disc_io_options_dubious(),
+ %% We run `rabbit` boot steps only for now. Plugins boot steps
+ %% will be executed as part of the postlaunch phase after they
+ %% are started.
+ rabbit_boot_steps:run_boot_steps([rabbit]),
+ run_postlaunch_phase(),
+ {ok, SupPid}
+ catch
+ throw:{error, _} = Error ->
+ mnesia:stop(),
+ rabbit_prelaunch_errors:log_error(Error),
+ rabbit_prelaunch:set_stop_reason(Error),
+ rabbit_boot_state:set(stopped),
+ Error;
+ Class:Exception:Stacktrace ->
+ mnesia:stop(),
+ rabbit_prelaunch_errors:log_exception(
+ Class, Exception, Stacktrace),
+ Error = {error, Exception},
+ rabbit_prelaunch:set_stop_reason(Error),
+ rabbit_boot_state:set(stopped),
+ Error
+ end.
+
+run_postlaunch_phase() ->
+ spawn(fun() -> do_run_postlaunch_phase() end).
+
+do_run_postlaunch_phase() ->
+ %% Once RabbitMQ itself is started, we need to run a few more steps,
+ %% in particular start plugins.
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug("== Postlaunch phase =="),
+
+ try
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug("== Plugins =="),
+
+ rabbit_log_prelaunch:debug("Setting plugins up"),
+ %% `Plugins` contains all the enabled plugins, plus their
+ %% dependencies. The order is important: dependencies appear
+ %% before plugin which depend on them.
+ Plugins = rabbit_plugins:setup(),
+ rabbit_log_prelaunch:debug(
+ "Starting the following plugins: ~p", [Plugins]),
+ %% We can load all plugins and refresh their feature flags at
+ %% once, because it does not involve running code from the
+ %% plugins.
+ app_utils:load_applications(Plugins),
+ ok = rabbit_feature_flags:refresh_feature_flags_after_app_load(
+ Plugins),
+ %% However, we want to run their boot steps and actually start
+ %% them one by one, to ensure a dependency is fully started
+ %% before a plugin which depends on it gets a chance to start.
+ lists:foreach(
+ fun(Plugin) ->
+ ok = rabbit_boot_steps:run_boot_steps([Plugin]),
+ case application:ensure_all_started(Plugin) of
+ {ok, _} -> ok;
+ Error -> throw(Error)
+ end
+ end, Plugins),
+
+ %% Successful boot resets node maintenance state.
+ rabbit_log_prelaunch:info("Resetting node maintenance status"),
+ _ = rabbit_maintenance:unmark_as_being_drained(),
+
+ %% Export definitions after all plugins have been enabled,
+ %% see rabbitmq/rabbitmq-server#2384
+ case rabbit_definitions:maybe_load_definitions() of
+ ok -> ok;
+ DefLoadError -> throw(DefLoadError)
+ end,
+
+ %% Start listeners after all plugins have been enabled,
+ %% see rabbitmq/rabbitmq-server#2405.
+ rabbit_log_prelaunch:info(
+ "Ready to start client connection listeners"),
+ ok = rabbit_networking:boot(),
+
+ %% The node is ready: mark it as such and log it.
+ %% NOTE: PLEASE DO NOT ADD CRITICAL NODE STARTUP CODE AFTER THIS.
+ ok = rabbit_lager:broker_is_started(),
+ ok = log_broker_started(
+ rabbit_plugins:strictly_plugins(rabbit_plugins:active())),
+
+ rabbit_log_prelaunch:debug("Marking ~s as running", [product_name()]),
+ rabbit_boot_state:set(ready)
+ catch
+ throw:{error, _} = Error ->
+ rabbit_prelaunch_errors:log_error(Error),
+ rabbit_prelaunch:set_stop_reason(Error),
+ do_stop();
+ Class:Exception:Stacktrace ->
+ rabbit_prelaunch_errors:log_exception(
+ Class, Exception, Stacktrace),
+ Error = {error, Exception},
+ rabbit_prelaunch:set_stop_reason(Error),
+ do_stop()
+ end.
+
+prep_stop(State) ->
+ rabbit_boot_state:set(stopping),
+ rabbit_peer_discovery:maybe_unregister(),
+ State.
+
+-spec stop(_) -> 'ok'.
+
+stop(State) ->
+ ok = rabbit_alarm:stop(),
+ ok = case rabbit_mnesia:is_clustered() of
+ true -> ok;
+ false -> rabbit_table:clear_ram_only_tables()
+ end,
+ case State of
+ [] -> rabbit_prelaunch:set_stop_reason(normal);
+ _ -> rabbit_prelaunch:set_stop_reason(State)
+ end,
+ rabbit_boot_state:set(stopped),
+ ok.
+
+%%---------------------------------------------------------------------------
+%% boot step functions
+
+-spec boot_delegate() -> 'ok'.
+
+boot_delegate() ->
+ {ok, Count} = application:get_env(rabbit, delegate_count),
+ rabbit_sup:start_supervisor_child(delegate_sup, [Count]).
+
+-spec recover() -> 'ok'.
+
+recover() ->
+ ok = rabbit_policy:recover(),
+ ok = rabbit_vhost:recover(),
+ ok = lager_exchange_backend:maybe_init_exchange().
+
+-spec maybe_insert_default_data() -> 'ok'.
+
+maybe_insert_default_data() ->
+ NoDefsToImport = not rabbit_definitions:has_configured_definitions_to_load(),
+ case rabbit_table:needs_default_data() andalso NoDefsToImport of
+ true ->
+ rabbit_log:info("Will seed default virtual host and user..."),
+ insert_default_data();
+ false ->
+ rabbit_log:info("Will not seed default virtual host and user: have definitions to load..."),
+ ok
+ end.
+
+insert_default_data() ->
+ {ok, DefaultUser} = application:get_env(default_user),
+ {ok, DefaultPass} = application:get_env(default_pass),
+ {ok, DefaultTags} = application:get_env(default_user_tags),
+ {ok, DefaultVHost} = application:get_env(default_vhost),
+ {ok, [DefaultConfigurePerm, DefaultWritePerm, DefaultReadPerm]} =
+ application:get_env(default_permissions),
+
+ DefaultUserBin = rabbit_data_coercion:to_binary(DefaultUser),
+ DefaultPassBin = rabbit_data_coercion:to_binary(DefaultPass),
+ DefaultVHostBin = rabbit_data_coercion:to_binary(DefaultVHost),
+ DefaultConfigurePermBin = rabbit_data_coercion:to_binary(DefaultConfigurePerm),
+ DefaultWritePermBin = rabbit_data_coercion:to_binary(DefaultWritePerm),
+ DefaultReadPermBin = rabbit_data_coercion:to_binary(DefaultReadPerm),
+
+ ok = rabbit_vhost:add(DefaultVHostBin, <<"Default virtual host">>, [], ?INTERNAL_USER),
+ ok = lager_exchange_backend:maybe_init_exchange(),
+ ok = rabbit_auth_backend_internal:add_user(
+ DefaultUserBin,
+ DefaultPassBin,
+ ?INTERNAL_USER
+ ),
+ ok = rabbit_auth_backend_internal:set_tags(DefaultUserBin, DefaultTags,
+ ?INTERNAL_USER),
+ ok = rabbit_auth_backend_internal:set_permissions(DefaultUserBin,
+ DefaultVHostBin,
+ DefaultConfigurePermBin,
+ DefaultWritePermBin,
+ DefaultReadPermBin,
+ ?INTERNAL_USER),
+ ok.
+
+%%---------------------------------------------------------------------------
+%% logging
+
+-spec log_locations() -> [rabbit_lager:log_location()].
+log_locations() ->
+ rabbit_lager:log_locations().
+
+-spec config_locations() -> [rabbit_config:config_location()].
+config_locations() ->
+ rabbit_config:config_files().
+
+-spec force_event_refresh(reference()) -> 'ok'.
+
+% Note: https://www.pivotaltracker.com/story/show/166962656
+% This event is necessary for the stats timer to be initialized with
+% the correct values once the management agent has started
+force_event_refresh(Ref) ->
+ % direct connections, e.g. MQTT, STOMP
+ ok = rabbit_direct:force_event_refresh(Ref),
+ % AMQP connections
+ ok = rabbit_networking:force_connection_event_refresh(Ref),
+ % "external" connections, which are not handled by the "AMQP core",
+ % e.g. connections to the stream plugin
+ ok = rabbit_networking:force_non_amqp_connection_event_refresh(Ref),
+ ok = rabbit_channel:force_event_refresh(Ref),
+ ok = rabbit_amqqueue:force_event_refresh(Ref).
+
+%%---------------------------------------------------------------------------
+%% misc
+
+log_broker_started(Plugins) ->
+ PluginList = iolist_to_binary([rabbit_misc:format(" * ~s~n", [P])
+ || P <- Plugins]),
+ Message = string:strip(rabbit_misc:format(
+ "Server startup complete; ~b plugins started.~n~s",
+ [length(Plugins), PluginList]), right, $\n),
+ rabbit_log:info(Message),
+ io:format(" completed with ~p plugins.~n", [length(Plugins)]).
+
+-define(RABBIT_TEXT_LOGO,
+ "~n ## ## ~s ~s"
+ "~n ## ##"
+ "~n ########## ~s"
+ "~n ###### ##"
+ "~n ########## ~s").
+-define(FG8_START, "\033[38;5;202m").
+-define(BG8_START, "\033[48;5;202m").
+-define(FG32_START, "\033[38;2;255;102;0m").
+-define(BG32_START, "\033[48;2;255;102;0m").
+-define(C_END, "\033[0m").
+-define(RABBIT_8BITCOLOR_LOGO,
+ "~n " ?BG8_START " " ?C_END " " ?BG8_START " " ?C_END " \033[1m" ?FG8_START "~s" ?C_END " ~s"
+ "~n " ?BG8_START " " ?C_END " " ?BG8_START " " ?C_END
+ "~n " ?BG8_START " " ?C_END " ~s"
+ "~n " ?BG8_START " " ?C_END " " ?BG8_START " " ?C_END
+ "~n " ?BG8_START " " ?C_END " ~s").
+-define(RABBIT_32BITCOLOR_LOGO,
+ "~n " ?BG32_START " " ?C_END " " ?BG32_START " " ?C_END " \033[1m" ?FG32_START "~s" ?C_END " ~s"
+ "~n " ?BG32_START " " ?C_END " " ?BG32_START " " ?C_END
+ "~n " ?BG32_START " " ?C_END " ~s"
+ "~n " ?BG32_START " " ?C_END " " ?BG32_START " " ?C_END
+ "~n " ?BG32_START " " ?C_END " ~s").
+
+print_banner() ->
+ Product = product_name(),
+ Version = product_version(),
+ LineListFormatter = fun (Placeholder, [_ | Tail] = LL) ->
+ LF = lists:flatten([Placeholder || _ <- lists:seq(1, length(Tail))]),
+ {LF, LL};
+ (_, []) ->
+ {"", ["(none)"]}
+ end,
+ Logo = case rabbit_prelaunch:get_context() of
+ %% We use the colored logo only when running the
+ %% interactive shell and when colors are supported.
+ %%
+ %% Basically it means it will be used on Unix when
+ %% running "make run-broker" and that's about it.
+ #{os_type := {unix, darwin},
+ interactive_shell := true,
+ output_supports_colors := true} -> ?RABBIT_8BITCOLOR_LOGO;
+ #{interactive_shell := true,
+ output_supports_colors := true} -> ?RABBIT_32BITCOLOR_LOGO;
+ _ -> ?RABBIT_TEXT_LOGO
+ end,
+ %% padded list lines
+ {LogFmt, LogLocations} = LineListFormatter("~n ~ts", log_locations()),
+ {CfgFmt, CfgLocations} = LineListFormatter("~n ~ts", config_locations()),
+ {MOTDFormat, MOTDArgs} = case motd() of
+ undefined ->
+ {"", []};
+ MOTD ->
+ Lines = string:split(MOTD, "\n", all),
+ Padded = [case Line of
+ <<>> -> "\n";
+ _ -> [" ", Line, "\n"]
+ end
+ || Line <- Lines],
+ {"~n~ts", [Padded]}
+ end,
+ io:format(Logo ++
+ "~n" ++
+ MOTDFormat ++
+ "~n Doc guides: https://rabbitmq.com/documentation.html"
+ "~n Support: https://rabbitmq.com/contact.html"
+ "~n Tutorials: https://rabbitmq.com/getstarted.html"
+ "~n Monitoring: https://rabbitmq.com/monitoring.html"
+ "~n"
+ "~n Logs: ~ts" ++ LogFmt ++ "~n"
+ "~n Config file(s): ~ts" ++ CfgFmt ++ "~n"
+ "~n Starting broker...",
+ [Product, Version, ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE] ++
+ MOTDArgs ++
+ LogLocations ++
+ CfgLocations).
+
+log_motd() ->
+ case motd() of
+ undefined ->
+ ok;
+ MOTD ->
+ Lines = string:split(MOTD, "\n", all),
+ Padded = [case Line of
+ <<>> -> "\n";
+ _ -> [" ", Line, "\n"]
+ end
+ || Line <- Lines],
+ rabbit_log:info("~n~ts", [string:trim(Padded, trailing, [$\r, $\n])])
+ end.
+
+log_banner() ->
+ {FirstLog, OtherLogs} = case log_locations() of
+ [Head | Tail] ->
+ {Head, [{"", F} || F <- Tail]};
+ [] ->
+ {"(none)", []}
+ end,
+ Settings = [{"node", node()},
+ {"home dir", home_dir()},
+ {"config file(s)", config_files()},
+ {"cookie hash", rabbit_nodes:cookie_hash()},
+ {"log(s)", FirstLog}] ++
+ OtherLogs ++
+ [{"database dir", rabbit_mnesia:dir()}],
+ DescrLen = 1 + lists:max([length(K) || {K, _V} <- Settings]),
+ Format = fun (K, V) ->
+ rabbit_misc:format(
+ " ~-" ++ integer_to_list(DescrLen) ++ "s: ~ts~n", [K, V])
+ end,
+ Banner = string:strip(lists:flatten(
+ [case S of
+ {"config file(s)" = K, []} ->
+ Format(K, "(none)");
+ {"config file(s)" = K, [V0 | Vs]} ->
+ [Format(K, V0) | [Format("", V) || V <- Vs]];
+ {K, V} ->
+ Format(K, V)
+ end || S <- Settings]), right, $\n),
+ rabbit_log:info("~n~ts", [Banner]).
+
+warn_if_kernel_config_dubious() ->
+ case os:type() of
+ {win32, _} ->
+ ok;
+ _ ->
+ case erlang:system_info(kernel_poll) of
+ true -> ok;
+ false -> rabbit_log:warning(
+ "Kernel poll (epoll, kqueue, etc) is disabled. Throughput "
+ "and CPU utilization may worsen.~n")
+ end
+ end,
+ AsyncThreads = erlang:system_info(thread_pool_size),
+ case AsyncThreads < ?ASYNC_THREADS_WARNING_THRESHOLD of
+ true -> rabbit_log:warning(
+ "Erlang VM is running with ~b I/O threads, "
+ "file I/O performance may worsen~n", [AsyncThreads]);
+ false -> ok
+ end,
+ IDCOpts = case application:get_env(kernel, inet_default_connect_options) of
+ undefined -> [];
+ {ok, Val} -> Val
+ end,
+ case proplists:get_value(nodelay, IDCOpts, false) of
+ false -> rabbit_log:warning("Nagle's algorithm is enabled for sockets, "
+ "network I/O latency will be higher~n");
+ true -> ok
+ end.
+
+warn_if_disc_io_options_dubious() ->
+ %% if these values are not set, it doesn't matter since
+ %% rabbit_variable_queue will pick up the values defined in the
+ %% IO_BATCH_SIZE and CREDIT_DISC_BOUND constants.
+ CreditDiscBound = rabbit_misc:get_env(rabbit, msg_store_credit_disc_bound,
+ undefined),
+ IoBatchSize = rabbit_misc:get_env(rabbit, msg_store_io_batch_size,
+ undefined),
+ case catch validate_msg_store_io_batch_size_and_credit_disc_bound(
+ CreditDiscBound, IoBatchSize) of
+ ok -> ok;
+ {error, {Reason, Vars}} ->
+ rabbit_log:warning(Reason, Vars)
+ end.
+
+validate_msg_store_io_batch_size_and_credit_disc_bound(CreditDiscBound,
+ IoBatchSize) ->
+ case IoBatchSize of
+ undefined ->
+ ok;
+ IoBatchSize when is_integer(IoBatchSize) ->
+ if IoBatchSize < ?IO_BATCH_SIZE ->
+ throw({error,
+ {"io_batch_size of ~b lower than recommended value ~b, "
+ "paging performance may worsen~n",
+ [IoBatchSize, ?IO_BATCH_SIZE]}});
+ true ->
+ ok
+ end;
+ IoBatchSize ->
+ throw({error,
+ {"io_batch_size should be an integer, but ~b given",
+ [IoBatchSize]}})
+ end,
+
+ %% CreditDiscBound = {InitialCredit, MoreCreditAfter}
+ {RIC, RMCA} = ?CREDIT_DISC_BOUND,
+ case CreditDiscBound of
+ undefined ->
+ ok;
+ {IC, MCA} when is_integer(IC), is_integer(MCA) ->
+ if IC < RIC; MCA < RMCA ->
+ throw({error,
+ {"msg_store_credit_disc_bound {~b, ~b} lower than"
+ "recommended value {~b, ~b},"
+ " paging performance may worsen~n",
+ [IC, MCA, RIC, RMCA]}});
+ true ->
+ ok
+ end;
+ {IC, MCA} ->
+ throw({error,
+ {"both msg_store_credit_disc_bound values should be integers, but ~p given",
+ [{IC, MCA}]}});
+ CreditDiscBound ->
+ throw({error,
+ {"invalid msg_store_credit_disc_bound value given: ~p",
+ [CreditDiscBound]}})
+ end,
+
+ case {CreditDiscBound, IoBatchSize} of
+ {undefined, undefined} ->
+ ok;
+ {_CDB, undefined} ->
+ ok;
+ {undefined, _IBS} ->
+ ok;
+ {{InitialCredit, _MCA}, IoBatchSize} ->
+ if IoBatchSize < InitialCredit ->
+ throw(
+ {error,
+ {"msg_store_io_batch_size ~b should be bigger than the initial "
+ "credit value from msg_store_credit_disc_bound ~b,"
+ " paging performance may worsen~n",
+ [IoBatchSize, InitialCredit]}});
+ true ->
+ ok
+ end
+ end.
+
+-spec product_name() -> string().
+
+product_name() ->
+ case product_info() of
+ #{product_name := ProductName} -> ProductName;
+ #{product_base_name := BaseName} -> BaseName
+ end.
+
+-spec product_version() -> string().
+
+product_version() ->
+ case product_info() of
+ #{product_version := ProductVersion} -> ProductVersion;
+ #{product_base_version := BaseVersion} -> BaseVersion
+ end.
+
+-spec product_info() -> #{product_base_name := string(),
+ product_base_version := string(),
+ product_overridden := boolean(),
+ product_name => string(),
+ product_version => string(),
+ otp_release := string()}.
+
+product_info() ->
+ PTKey = {?MODULE, product},
+ try
+ %% The value is cached the first time to avoid calling the
+ %% application master many times just for that.
+ persistent_term:get(PTKey)
+ catch
+ error:badarg ->
+ BaseName = base_product_name(),
+ BaseVersion = base_product_version(),
+ Info0 = #{product_base_name => BaseName,
+ product_base_version => BaseVersion,
+ otp_release => rabbit_misc:otp_release()},
+
+ {NameFromEnv, VersionFromEnv} =
+ case rabbit_prelaunch:get_context() of
+ #{product_name := NFE,
+ product_version := VFE} -> {NFE, VFE};
+ _ -> {undefined, undefined}
+ end,
+
+ Info1 = case NameFromEnv of
+ undefined ->
+ NameFromApp = string_from_app_env(
+ product_name,
+ undefined),
+ case NameFromApp of
+ undefined ->
+ Info0;
+ _ ->
+ Info0#{product_name => NameFromApp,
+ product_overridden => true}
+ end;
+ _ ->
+ Info0#{product_name => NameFromEnv,
+ product_overridden => true}
+ end,
+
+ Info2 = case VersionFromEnv of
+ undefined ->
+ VersionFromApp = string_from_app_env(
+ product_version,
+ undefined),
+ case VersionFromApp of
+ undefined ->
+ Info1;
+ _ ->
+ Info1#{product_version => VersionFromApp,
+ product_overridden => true}
+ end;
+ _ ->
+ Info1#{product_version => VersionFromEnv,
+ product_overridden => true}
+ end,
+ persistent_term:put(PTKey, Info2),
+ Info2
+ end.
+
+string_from_app_env(Key, Default) ->
+ case application:get_env(rabbit, Key) of
+ {ok, Val} ->
+ case io_lib:deep_char_list(Val) of
+ true ->
+ case lists:flatten(Val) of
+ "" -> Default;
+ String -> String
+ end;
+ false ->
+ Default
+ end;
+ undefined ->
+ Default
+ end.
+
+base_product_name() ->
+ %% This function assumes the `rabbit` application was loaded in
+ %% product_info().
+ {ok, Product} = application:get_key(rabbit, description),
+ Product.
+
+base_product_version() ->
+ %% This function assumes the `rabbit` application was loaded in
+ %% product_info().
+ rabbit_misc:version().
+
+motd_file() ->
+ %% Precendence is:
+ %% 1. The environment variable;
+ %% 2. The `motd_file` configuration parameter;
+ %% 3. The default value.
+ Context = rabbit_prelaunch:get_context(),
+ case Context of
+ #{motd_file := File,
+ var_origins := #{motd_file := environment}}
+ when File =/= undefined ->
+ File;
+ _ ->
+ Default = case Context of
+ #{motd_file := File} -> File;
+ _ -> undefined
+ end,
+ string_from_app_env(motd_file, Default)
+ end.
+
+motd() ->
+ case motd_file() of
+ undefined ->
+ undefined;
+ File ->
+ case file:read_file(File) of
+ {ok, MOTD} -> string:trim(MOTD, trailing, [$\r,$\n]);
+ {error, _} -> undefined
+ end
+ end.
+
+home_dir() ->
+ case init:get_argument(home) of
+ {ok, [[Home]]} -> Home;
+ Other -> Other
+ end.
+
+config_files() ->
+ rabbit_config:config_files().
+
+%% We don't want this in fhc since it references rabbit stuff. And we can't put
+%% this in the bootstep directly.
+start_fhc() ->
+ ok = rabbit_sup:start_restartable_child(
+ file_handle_cache,
+ [fun rabbit_alarm:set_alarm/1, fun rabbit_alarm:clear_alarm/1]),
+ ensure_working_fhc().
+
+ensure_working_fhc() ->
+ %% To test the file handle cache, we simply read a file we know it
+ %% exists (Erlang kernel's .app file).
+ %%
+ %% To avoid any pollution of the application process' dictionary by
+ %% file_handle_cache, we spawn a separate process.
+ Parent = self(),
+ TestFun = fun() ->
+ ReadBuf = case application:get_env(rabbit, fhc_read_buffering) of
+ {ok, true} -> "ON";
+ {ok, false} -> "OFF"
+ end,
+ WriteBuf = case application:get_env(rabbit, fhc_write_buffering) of
+ {ok, true} -> "ON";
+ {ok, false} -> "OFF"
+ end,
+ rabbit_log:info("FHC read buffering: ~s~n", [ReadBuf]),
+ rabbit_log:info("FHC write buffering: ~s~n", [WriteBuf]),
+ Filename = filename:join(code:lib_dir(kernel, ebin), "kernel.app"),
+ {ok, Fd} = file_handle_cache:open(Filename, [raw, binary, read], []),
+ {ok, _} = file_handle_cache:read(Fd, 1),
+ ok = file_handle_cache:close(Fd),
+ Parent ! fhc_ok
+ end,
+ TestPid = spawn_link(TestFun),
+ %% Because we are waiting for the test fun, abuse the
+ %% 'mnesia_table_loading_retry_timeout' parameter to find a sane timeout
+ %% value.
+ Timeout = rabbit_table:retry_timeout(),
+ receive
+ fhc_ok -> ok;
+ {'EXIT', TestPid, Exception} -> throw({ensure_working_fhc, Exception})
+ after Timeout ->
+ throw({ensure_working_fhc, {timeout, TestPid}})
+ end.
diff --git a/deps/rabbit/src/rabbit_access_control.erl b/deps/rabbit/src/rabbit_access_control.erl
new file mode 100644
index 0000000000..72260d5723
--- /dev/null
+++ b/deps/rabbit/src/rabbit_access_control.erl
@@ -0,0 +1,257 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_access_control).
+
+-include("rabbit.hrl").
+
+-export([check_user_pass_login/2, check_user_login/2, check_user_loopback/2,
+ check_vhost_access/4, check_resource_access/4, check_topic_access/4]).
+
+-export([permission_cache_can_expire/1, update_state/2]).
+
+%%----------------------------------------------------------------------------
+
+-export_type([permission_atom/0]).
+
+-type permission_atom() :: 'configure' | 'read' | 'write'.
+
+%%----------------------------------------------------------------------------
+
+-spec check_user_pass_login
+ (rabbit_types:username(), rabbit_types:password()) ->
+ {'ok', rabbit_types:user()} |
+ {'refused', rabbit_types:username(), string(), [any()]}.
+
+check_user_pass_login(Username, Password) ->
+ check_user_login(Username, [{password, Password}]).
+
+-spec check_user_login
+ (rabbit_types:username(), [{atom(), any()}]) ->
+ {'ok', rabbit_types:user()} |
+ {'refused', rabbit_types:username(), string(), [any()]}.
+
+check_user_login(Username, AuthProps) ->
+ %% extra auth properties like MQTT client id are in AuthProps
+ {ok, Modules} = application:get_env(rabbit, auth_backends),
+ R = lists:foldl(
+ fun (rabbit_auth_backend_cache=ModN, {refused, _, _, _}) ->
+ %% It is possible to specify authn/authz within the cache module settings,
+ %% so we have to do both auth steps here
+ %% See this rabbitmq-users discussion:
+ %% https://groups.google.com/d/topic/rabbitmq-users/ObqM7MQdA3I/discussion
+ try_authenticate_and_try_authorize(ModN, ModN, Username, AuthProps);
+ ({ModN, ModZs}, {refused, _, _, _}) ->
+ %% Different modules for authN vs authZ. So authenticate
+ %% with authN module, then if that succeeds do
+ %% passwordless (i.e pre-authenticated) login with authZ.
+ try_authenticate_and_try_authorize(ModN, ModZs, Username, AuthProps);
+ (Mod, {refused, _, _, _}) ->
+ %% Same module for authN and authZ. Just take the result
+ %% it gives us
+ case try_authenticate(Mod, Username, AuthProps) of
+ {ok, ModNUser = #auth_user{username = Username2, impl = Impl}} ->
+ rabbit_log:debug("User '~s' authenticated successfully by backend ~s", [Username2, Mod]),
+ user(ModNUser, {ok, [{Mod, Impl}], []});
+ Else ->
+ rabbit_log:debug("User '~s' failed authenticatation by backend ~s", [Username, Mod]),
+ Else
+ end;
+ (_, {ok, User}) ->
+ %% We've successfully authenticated. Skip to the end...
+ {ok, User}
+ end,
+ {refused, Username, "No modules checked '~s'", [Username]}, Modules),
+ R.
+
+try_authenticate_and_try_authorize(ModN, ModZs0, Username, AuthProps) ->
+ ModZs = case ModZs0 of
+ A when is_atom(A) -> [A];
+ L when is_list(L) -> L
+ end,
+ case try_authenticate(ModN, Username, AuthProps) of
+ {ok, ModNUser = #auth_user{username = Username2}} ->
+ rabbit_log:debug("User '~s' authenticated successfully by backend ~s", [Username2, ModN]),
+ user(ModNUser, try_authorize(ModZs, Username2, AuthProps));
+ Else ->
+ Else
+ end.
+
+try_authenticate(Module, Username, AuthProps) ->
+ case Module:user_login_authentication(Username, AuthProps) of
+ {ok, AuthUser} -> {ok, AuthUser};
+ {error, E} -> {refused, Username,
+ "~s failed authenticating ~s: ~p~n",
+ [Module, Username, E]};
+ {refused, F, A} -> {refused, Username, F, A}
+ end.
+
+try_authorize(Modules, Username, AuthProps) ->
+ lists:foldr(
+ fun (Module, {ok, ModsImpls, ModsTags}) ->
+ case Module:user_login_authorization(Username, AuthProps) of
+ {ok, Impl, Tags}-> {ok, [{Module, Impl} | ModsImpls], ModsTags ++ Tags};
+ {ok, Impl} -> {ok, [{Module, Impl} | ModsImpls], ModsTags};
+ {error, E} -> {refused, Username,
+ "~s failed authorizing ~s: ~p~n",
+ [Module, Username, E]};
+ {refused, F, A} -> {refused, Username, F, A}
+ end;
+ (_, {refused, F, A}) ->
+ {refused, Username, F, A}
+ end, {ok, [], []}, Modules).
+
+user(#auth_user{username = Username, tags = Tags}, {ok, ModZImpls, ModZTags}) ->
+ {ok, #user{username = Username,
+ tags = Tags ++ ModZTags,
+ authz_backends = ModZImpls}};
+user(_AuthUser, Error) ->
+ Error.
+
+auth_user(#user{username = Username, tags = Tags}, Impl) ->
+ #auth_user{username = Username,
+ tags = Tags,
+ impl = Impl}.
+
+-spec check_user_loopback
+ (rabbit_types:username(), rabbit_net:socket() | inet:ip_address()) ->
+ 'ok' | 'not_allowed'.
+
+check_user_loopback(Username, SockOrAddr) ->
+ {ok, Users} = application:get_env(rabbit, loopback_users),
+ case rabbit_net:is_loopback(SockOrAddr)
+ orelse not lists:member(Username, Users) of
+ true -> ok;
+ false -> not_allowed
+ end.
+
+get_authz_data_from({ip, Address}) ->
+ #{peeraddr => Address};
+get_authz_data_from({socket, Sock}) ->
+ {ok, {Address, _Port}} = rabbit_net:peername(Sock),
+ #{peeraddr => Address};
+get_authz_data_from(undefined) ->
+ undefined.
+
+% Note: ip can be either a tuple or, a binary if reverse_dns_lookups
+% is enabled and it's a direct connection.
+-spec check_vhost_access(User :: rabbit_types:user(),
+ VHostPath :: rabbit_types:vhost(),
+ AuthzRawData :: {socket, rabbit_net:socket()} | {ip, inet:ip_address() | binary()} | undefined,
+ AuthzContext :: map()) ->
+ 'ok' | rabbit_types:channel_exit().
+check_vhost_access(User = #user{username = Username,
+ authz_backends = Modules}, VHostPath, AuthzRawData, AuthzContext) ->
+ AuthzData = get_authz_data_from(AuthzRawData),
+ FullAuthzContext = create_vhost_access_authz_data(AuthzData, AuthzContext),
+ lists:foldl(
+ fun({Mod, Impl}, ok) ->
+ check_access(
+ fun() ->
+ rabbit_vhost:exists(VHostPath) andalso
+ Mod:check_vhost_access(
+ auth_user(User, Impl), VHostPath, FullAuthzContext)
+ end,
+ Mod, "access to vhost '~s' refused for user '~s'",
+ [VHostPath, Username], not_allowed);
+ (_, Else) ->
+ Else
+ end, ok, Modules).
+
+create_vhost_access_authz_data(undefined, Context) when map_size(Context) == 0 ->
+ undefined;
+create_vhost_access_authz_data(undefined, Context) ->
+ Context;
+create_vhost_access_authz_data(PeerAddr, Context) when map_size(Context) == 0 ->
+ PeerAddr;
+create_vhost_access_authz_data(PeerAddr, Context) ->
+ maps:merge(PeerAddr, Context).
+
+-spec check_resource_access
+ (rabbit_types:user(), rabbit_types:r(atom()), permission_atom(), rabbit_types:authz_context()) ->
+ 'ok' | rabbit_types:channel_exit().
+
+check_resource_access(User, R = #resource{kind = exchange, name = <<"">>},
+ Permission, Context) ->
+ check_resource_access(User, R#resource{name = <<"amq.default">>},
+ Permission, Context);
+check_resource_access(User = #user{username = Username,
+ authz_backends = Modules},
+ Resource, Permission, Context) ->
+ lists:foldl(
+ fun({Module, Impl}, ok) ->
+ check_access(
+ fun() -> Module:check_resource_access(
+ auth_user(User, Impl), Resource, Permission, Context) end,
+ Module, "access to ~s refused for user '~s'",
+ [rabbit_misc:rs(Resource), Username]);
+ (_, Else) -> Else
+ end, ok, Modules).
+
+check_topic_access(User = #user{username = Username,
+ authz_backends = Modules},
+ Resource, Permission, Context) ->
+ lists:foldl(
+ fun({Module, Impl}, ok) ->
+ check_access(
+ fun() -> Module:check_topic_access(
+ auth_user(User, Impl), Resource, Permission, Context) end,
+ Module, "access to topic '~s' in exchange ~s refused for user '~s'",
+ [maps:get(routing_key, Context), rabbit_misc:rs(Resource), Username]);
+ (_, Else) -> Else
+ end, ok, Modules).
+
+check_access(Fun, Module, ErrStr, ErrArgs) ->
+ check_access(Fun, Module, ErrStr, ErrArgs, access_refused).
+
+check_access(Fun, Module, ErrStr, ErrArgs, ErrName) ->
+ case Fun() of
+ true ->
+ ok;
+ false ->
+ rabbit_misc:protocol_error(ErrName, ErrStr, ErrArgs);
+ {error, E} ->
+ FullErrStr = ErrStr ++ ", backend ~s returned an error: ~p~n",
+ FullErrArgs = ErrArgs ++ [Module, E],
+ rabbit_log:error(FullErrStr, FullErrArgs),
+ rabbit_misc:protocol_error(ErrName, FullErrStr, FullErrArgs)
+ end.
+
+-spec update_state(User :: rabbit_types:user(), NewState :: term()) ->
+ {'ok', rabbit_types:auth_user()} |
+ {'refused', string()} |
+ {'error', any()}.
+
+update_state(User = #user{authz_backends = Backends0}, NewState) ->
+ %% N.B.: we use foldl/3 and prepending, so the final list of
+ %% backends is in reverse order from the original list.
+ Backends = lists:foldl(
+ fun({Module, Impl}, {ok, Acc}) ->
+ case Module:state_can_expire() of
+ true ->
+ case Module:update_state(auth_user(User, Impl), NewState) of
+ {ok, #auth_user{impl = Impl1}} ->
+ {ok, [{Module, Impl1} | Acc]};
+ Else -> Else
+ end;
+ false ->
+ {ok, [{Module, Impl} | Acc]}
+ end;
+ (_, {error, _} = Err) -> Err;
+ (_, {refused, _, _} = Err) -> Err
+ end, {ok, []}, Backends0),
+ case Backends of
+ {ok, Pairs} -> {ok, User#user{authz_backends = lists:reverse(Pairs)}};
+ Else -> Else
+ end.
+
+-spec permission_cache_can_expire(User :: rabbit_types:user()) -> boolean().
+
+%% Returns true if any of the backends support credential expiration,
+%% otherwise returns false.
+permission_cache_can_expire(#user{authz_backends = Backends}) ->
+ lists:any(fun ({Module, _State}) -> Module:state_can_expire() end, Backends).
diff --git a/deps/rabbit/src/rabbit_alarm.erl b/deps/rabbit/src/rabbit_alarm.erl
new file mode 100644
index 0000000000..3f1ab7ae62
--- /dev/null
+++ b/deps/rabbit/src/rabbit_alarm.erl
@@ -0,0 +1,365 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+%% There are two types of alarms handled by this module:
+%%
+%% * per-node resource (disk, memory) alarms for the whole cluster. If any node
+%% has an alarm, then all publishing should be disabled across the
+%% cluster until all alarms clear. When a node sets such an alarm,
+%% this information is automatically propagated throughout the cluster.
+%% `#alarms.alarmed_nodes' is being used to track this type of alarms.
+%% * limits local to this node (file_descriptor_limit). Used for information
+%% purposes only: logging and getting node status. This information is not propagated
+%% throughout the cluster. `#alarms.alarms' is being used to track this type of alarms.
+%% @end
+
+-module(rabbit_alarm).
+
+-behaviour(gen_event).
+
+-export([start_link/0, start/0, stop/0, register/2, set_alarm/1,
+ clear_alarm/1, get_alarms/0, get_alarms/1, get_local_alarms/0, get_local_alarms/1, on_node_up/1, on_node_down/1,
+ format_as_map/1, format_as_maps/1, is_local/1]).
+
+-export([init/1, handle_call/2, handle_event/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-export([remote_conserve_resources/3]). %% Internal use only
+
+-define(SERVER, ?MODULE).
+
+-define(FILE_DESCRIPTOR_RESOURCE, <<"file descriptors">>).
+-define(MEMORY_RESOURCE, <<"memory">>).
+-define(DISK_SPACE_RESOURCE, <<"disk space">>).
+
+%%----------------------------------------------------------------------------
+
+-record(alarms, {alertees :: dict:dict(pid(), rabbit_types:mfargs()),
+ alarmed_nodes :: dict:dict(node(), [resource_alarm_source()]),
+ alarms :: [alarm()]}).
+
+-type local_alarm() :: 'file_descriptor_limit'.
+-type resource_alarm_source() :: 'disk' | 'memory'.
+-type resource_alarm() :: {resource_limit, resource_alarm_source(), node()}.
+-type alarm() :: local_alarm() | resource_alarm().
+
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ gen_event:start_link({local, ?SERVER}).
+
+-spec start() -> 'ok'.
+
+start() ->
+ ok = rabbit_sup:start_restartable_child(?MODULE),
+ ok = gen_event:add_handler(?SERVER, ?MODULE, []),
+ {ok, MemoryWatermark} = application:get_env(vm_memory_high_watermark),
+
+ rabbit_sup:start_restartable_child(
+ vm_memory_monitor, [MemoryWatermark,
+ fun (Alarm) ->
+ background_gc:run(),
+ set_alarm(Alarm)
+ end,
+ fun clear_alarm/1]),
+ {ok, DiskLimit} = application:get_env(disk_free_limit),
+ rabbit_sup:start_delayed_restartable_child(
+ rabbit_disk_monitor, [DiskLimit]),
+ ok.
+
+-spec stop() -> 'ok'.
+
+stop() -> ok.
+
+%% Registers a handler that should be called on every resource alarm change.
+%% Given a call rabbit_alarm:register(Pid, {M, F, A}), the handler would be
+%% called like this: `apply(M, F, A ++ [Pid, Source, Alert])', where `Source'
+%% has the type of resource_alarm_source() and `Alert' has the type of resource_alert().
+
+-spec register(pid(), rabbit_types:mfargs()) -> [atom()].
+
+register(Pid, AlertMFA) ->
+ gen_event:call(?SERVER, ?MODULE, {register, Pid, AlertMFA}, infinity).
+
+-spec set_alarm({alarm(), []}) -> 'ok'.
+
+set_alarm(Alarm) -> gen_event:notify(?SERVER, {set_alarm, Alarm}).
+
+-spec clear_alarm(alarm()) -> 'ok'.
+
+clear_alarm(Alarm) -> gen_event:notify(?SERVER, {clear_alarm, Alarm}).
+
+-spec get_alarms() -> [{alarm(), []}].
+get_alarms() -> gen_event:call(?SERVER, ?MODULE, get_alarms, infinity).
+
+-spec get_alarms(timeout()) -> [{alarm(), []}].
+get_alarms(Timeout) -> gen_event:call(?SERVER, ?MODULE, get_alarms, Timeout).
+
+-spec get_local_alarms() -> [alarm()].
+get_local_alarms() -> gen_event:call(?SERVER, ?MODULE, get_local_alarms, infinity).
+
+-spec get_local_alarms(timeout()) -> [alarm()].
+get_local_alarms(Timeout) -> gen_event:call(?SERVER, ?MODULE, get_local_alarms, Timeout).
+
+-spec filter_local_alarms([alarm()]) -> [alarm()].
+filter_local_alarms(Alarms) ->
+ lists:filter(fun is_local/1, Alarms).
+
+-spec is_local({alarm(), any()}) -> boolean().
+is_local({file_descriptor_limit, _}) -> true;
+is_local({{resource_limit, _Resource, Node}, _}) when Node =:= node() -> true;
+is_local({{resource_limit, _Resource, Node}, _}) when Node =/= node() -> false.
+
+-spec format_as_map(alarm()) -> #{binary() => term()}.
+format_as_map(file_descriptor_limit) ->
+ #{
+ <<"resource">> => ?FILE_DESCRIPTOR_RESOURCE,
+ <<"node">> => node()
+ };
+format_as_map({resource_limit, disk, Node}) ->
+ #{
+ <<"resource">> => ?DISK_SPACE_RESOURCE,
+ <<"node">> => Node
+ };
+format_as_map({resource_limit, memory, Node}) ->
+ #{
+ <<"resource">> => ?MEMORY_RESOURCE,
+ <<"node">> => Node
+ };
+format_as_map({resource_limit, Limit, Node}) ->
+ #{
+ <<"resource">> => rabbit_data_coercion:to_binary(Limit),
+ <<"node">> => Node
+ }.
+
+-spec format_as_maps([{alarm(), []}]) -> [#{any() => term()}].
+format_as_maps(Alarms) when is_list(Alarms) ->
+ %% get_alarms/0 returns
+ %%
+ %% [
+ %% {file_descriptor_limit, []},
+ %% {{resource_limit, disk, rabbit@warp10}, []},
+ %% {{resource_limit, memory, rabbit@warp10}, []}
+ %% ]
+ lists:map(fun({Resource, _}) -> format_as_map(Resource);
+ (Resource) -> format_as_map(Resource)
+ end, Alarms).
+
+
+-spec on_node_up(node()) -> 'ok'.
+on_node_up(Node) -> gen_event:notify(?SERVER, {node_up, Node}).
+
+-spec on_node_down(node()) -> 'ok'.
+on_node_down(Node) -> gen_event:notify(?SERVER, {node_down, Node}).
+
+remote_conserve_resources(Pid, Source, {true, _, _}) ->
+ gen_event:notify({?SERVER, node(Pid)},
+ {set_alarm, {{resource_limit, Source, node()}, []}});
+remote_conserve_resources(Pid, Source, {false, _, _}) ->
+ gen_event:notify({?SERVER, node(Pid)},
+ {clear_alarm, {resource_limit, Source, node()}}).
+
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+ {ok, #alarms{alertees = dict:new(),
+ alarmed_nodes = dict:new(),
+ alarms = []}}.
+
+handle_call({register, Pid, AlertMFA}, State = #alarms{alarmed_nodes = AN}) ->
+ {ok, lists:usort(lists:append([V || {_, V} <- dict:to_list(AN)])),
+ internal_register(Pid, AlertMFA, State)};
+
+handle_call(get_alarms, State) ->
+ {ok, compute_alarms(State), State};
+
+handle_call(get_local_alarms, State) ->
+ {ok, filter_local_alarms(compute_alarms(State)), State};
+
+handle_call(_Request, State) ->
+ {ok, not_understood, State}.
+
+handle_event({set_alarm, {{resource_limit, Source, Node}, []}}, State) ->
+ case is_node_alarmed(Source, Node, State) of
+ true ->
+ {ok, State};
+ false ->
+ rabbit_event:notify(alarm_set, [{source, Source},
+ {node, Node}]),
+ handle_set_resource_alarm(Source, Node, State)
+ end;
+handle_event({set_alarm, Alarm}, State = #alarms{alarms = Alarms}) ->
+ case lists:member(Alarm, Alarms) of
+ true -> {ok, State};
+ false -> UpdatedAlarms = lists:usort([Alarm|Alarms]),
+ handle_set_alarm(Alarm, State#alarms{alarms = UpdatedAlarms})
+ end;
+
+handle_event({clear_alarm, {resource_limit, Source, Node}}, State) ->
+ case is_node_alarmed(Source, Node, State) of
+ true ->
+ rabbit_event:notify(alarm_cleared, [{source, Source},
+ {node, Node}]),
+ handle_clear_resource_alarm(Source, Node, State);
+ false ->
+ {ok, State}
+ end;
+handle_event({clear_alarm, Alarm}, State = #alarms{alarms = Alarms}) ->
+ case lists:keymember(Alarm, 1, Alarms) of
+ true -> handle_clear_alarm(
+ Alarm, State#alarms{alarms = lists:keydelete(
+ Alarm, 1, Alarms)});
+ false -> {ok, State}
+
+ end;
+
+handle_event({node_up, Node}, State) ->
+ %% Must do this via notify and not call to avoid possible deadlock.
+ ok = gen_event:notify(
+ {?SERVER, Node},
+ {register, self(), {?MODULE, remote_conserve_resources, []}}),
+ {ok, State};
+
+handle_event({node_down, Node}, #alarms{alarmed_nodes = AN} = State) ->
+ AlarmsForDeadNode = case dict:find(Node, AN) of
+ {ok, V} -> V;
+ error -> []
+ end,
+ {ok, lists:foldr(fun(Source, AccState) ->
+ rabbit_log:warning("~s resource limit alarm cleared for dead node ~p~n",
+ [Source, Node]),
+ maybe_alert(fun dict_unappend/3, Node, Source, false, AccState)
+ end, State, AlarmsForDeadNode)};
+
+handle_event({register, Pid, AlertMFA}, State) ->
+ {ok, internal_register(Pid, AlertMFA, State)};
+
+handle_event(_Event, State) ->
+ {ok, State}.
+
+handle_info({'DOWN', _MRef, process, Pid, _Reason},
+ State = #alarms{alertees = Alertees}) ->
+ {ok, State#alarms{alertees = dict:erase(Pid, Alertees)}};
+
+handle_info(_Info, State) ->
+ {ok, State}.
+
+terminate(_Arg, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%----------------------------------------------------------------------------
+
+dict_append(Key, Val, Dict) ->
+ L = case dict:find(Key, Dict) of
+ {ok, V} -> V;
+ error -> []
+ end,
+ dict:store(Key, lists:usort([Val|L]), Dict).
+
+dict_unappend(Key, Val, Dict) ->
+ L = case dict:find(Key, Dict) of
+ {ok, V} -> V;
+ error -> []
+ end,
+
+ case lists:delete(Val, L) of
+ [] -> dict:erase(Key, Dict);
+ X -> dict:store(Key, X, Dict)
+ end.
+
+maybe_alert(UpdateFun, Node, Source, WasAlertAdded,
+ State = #alarms{alarmed_nodes = AN,
+ alertees = Alertees}) ->
+ AN1 = UpdateFun(Node, Source, AN),
+ %% Is alarm for Source still set on any node?
+ StillHasAlerts = lists:any(fun ({_Node, NodeAlerts}) -> lists:member(Source, NodeAlerts) end, dict:to_list(AN1)),
+ case StillHasAlerts of
+ true -> ok;
+ false -> rabbit_log:warning("~s resource limit alarm cleared across the cluster~n", [Source])
+ end,
+ Alert = {WasAlertAdded, StillHasAlerts, Node},
+ case node() of
+ Node -> ok = alert_remote(Alert, Alertees, Source);
+ _ -> ok
+ end,
+ ok = alert_local(Alert, Alertees, Source),
+ State#alarms{alarmed_nodes = AN1}.
+
+alert_local(Alert, Alertees, Source) ->
+ alert(Alertees, Source, Alert, fun erlang:'=:='/2).
+
+alert_remote(Alert, Alertees, Source) ->
+ alert(Alertees, Source, Alert, fun erlang:'=/='/2).
+
+alert(Alertees, Source, Alert, NodeComparator) ->
+ Node = node(),
+ dict:fold(fun (Pid, {M, F, A}, ok) ->
+ case NodeComparator(Node, node(Pid)) of
+ true -> apply(M, F, A ++ [Pid, Source, Alert]);
+ false -> ok
+ end
+ end, ok, Alertees).
+
+internal_register(Pid, {M, F, A} = AlertMFA,
+ State = #alarms{alertees = Alertees}) ->
+ _MRef = erlang:monitor(process, Pid),
+ case dict:find(node(), State#alarms.alarmed_nodes) of
+ {ok, Sources} -> [apply(M, F, A ++ [Pid, R, {true, true, node()}]) || R <- Sources];
+ error -> ok
+ end,
+ NewAlertees = dict:store(Pid, AlertMFA, Alertees),
+ State#alarms{alertees = NewAlertees}.
+
+handle_set_resource_alarm(Source, Node, State) ->
+ rabbit_log:warning(
+ "~s resource limit alarm set on node ~p.~n~n"
+ "**********************************************************~n"
+ "*** Publishers will be blocked until this alarm clears ***~n"
+ "**********************************************************~n",
+ [Source, Node]),
+ {ok, maybe_alert(fun dict_append/3, Node, Source, true, State)}.
+
+handle_set_alarm({file_descriptor_limit, []}, State) ->
+ rabbit_log:warning(
+ "file descriptor limit alarm set.~n~n"
+ "********************************************************************~n"
+ "*** New connections will not be accepted until this alarm clears ***~n"
+ "********************************************************************~n"),
+ {ok, State};
+handle_set_alarm(Alarm, State) ->
+ rabbit_log:warning("alarm '~p' set~n", [Alarm]),
+ {ok, State}.
+
+handle_clear_resource_alarm(Source, Node, State) ->
+ rabbit_log:warning("~s resource limit alarm cleared on node ~p~n",
+ [Source, Node]),
+ {ok, maybe_alert(fun dict_unappend/3, Node, Source, false, State)}.
+
+handle_clear_alarm(file_descriptor_limit, State) ->
+ rabbit_log:warning("file descriptor limit alarm cleared~n"),
+ {ok, State};
+handle_clear_alarm(Alarm, State) ->
+ rabbit_log:warning("alarm '~p' cleared~n", [Alarm]),
+ {ok, State}.
+
+is_node_alarmed(Source, Node, #alarms{alarmed_nodes = AN}) ->
+ case dict:find(Node, AN) of
+ {ok, Sources} ->
+ lists:member(Source, Sources);
+ error ->
+ false
+ end.
+
+compute_alarms(#alarms{alarms = Alarms,
+ alarmed_nodes = AN}) ->
+ Alarms ++ [ {{resource_limit, Source, Node}, []}
+ || {Node, Sources} <- dict:to_list(AN), Source <- Sources ].
diff --git a/deps/rabbit/src/rabbit_amqqueue.erl b/deps/rabbit/src/rabbit_amqqueue.erl
new file mode 100644
index 0000000000..cd5f894680
--- /dev/null
+++ b/deps/rabbit/src/rabbit_amqqueue.erl
@@ -0,0 +1,1889 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_amqqueue).
+
+-export([warn_file_limit/0]).
+-export([recover/1, stop/1, start/1, declare/6, declare/7,
+ delete_immediately/1, delete_exclusive/2, delete/4, purge/1,
+ forget_all_durable/1]).
+-export([pseudo_queue/2, pseudo_queue/3, immutable/1]).
+-export([lookup/1, lookup_many/1, not_found_or_absent/1, not_found_or_absent_dirty/1,
+ with/2, with/3, with_or_die/2,
+ assert_equivalence/5,
+ check_exclusive_access/2, with_exclusive_access_or_die/3,
+ stat/1, deliver/2,
+ requeue/3, ack/3, reject/4]).
+-export([not_found/1, absent/2]).
+-export([list/0, list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2,
+ emit_info_all/5, list_local/1, info_local/1,
+ emit_info_local/4, emit_info_down/4]).
+-export([count/0]).
+-export([list_down/1, count/1, list_names/0, list_names/1, list_local_names/0,
+ list_local_names_down/0, list_with_possible_retry/1]).
+-export([list_by_type/1, sample_local_queues/0, sample_n_by_name/2, sample_n/2]).
+-export([force_event_refresh/1, notify_policy_changed/1]).
+-export([consumers/1, consumers_all/1, emit_consumers_all/4, consumer_info_keys/0]).
+-export([basic_get/5, basic_consume/12, basic_cancel/5, notify_decorators/1]).
+-export([notify_sent/2, notify_sent_queue_down/1, resume/2]).
+-export([notify_down_all/2, notify_down_all/3, activate_limit_all/2, credit/5]).
+-export([on_node_up/1, on_node_down/1]).
+-export([update/2, store_queue/1, update_decorators/1, policy_changed/2]).
+-export([update_mirroring/1, sync_mirrors/1, cancel_sync_mirrors/1]).
+-export([emit_unresponsive/6, emit_unresponsive_local/5, is_unresponsive/2]).
+-export([has_synchronised_mirrors_online/1]).
+-export([is_replicated/1, is_exclusive/1, is_not_exclusive/1, is_dead_exclusive/1]).
+-export([list_local_quorum_queues/0, list_local_quorum_queue_names/0,
+ list_local_mirrored_classic_queues/0, list_local_mirrored_classic_names/0,
+ list_local_leaders/0, list_local_followers/0, get_quorum_nodes/1,
+ list_local_mirrored_classic_without_synchronised_mirrors/0,
+ list_local_mirrored_classic_without_synchronised_mirrors_for_cli/0]).
+-export([ensure_rabbit_queue_record_is_initialized/1]).
+-export([format/1]).
+-export([delete_immediately_by_resource/1]).
+-export([delete_crashed/1,
+ delete_crashed/2,
+ delete_crashed_internal/2]).
+
+-export([pid_of/1, pid_of/2]).
+-export([mark_local_durable_queues_stopped/1]).
+
+-export([rebalance/3]).
+-export([collect_info_all/2]).
+
+-export([is_policy_applicable/2]).
+-export([is_server_named_allowed/1]).
+
+-export([check_max_age/1]).
+-export([get_queue_type/1]).
+
+%% internal
+-export([internal_declare/2, internal_delete/2, run_backing_queue/3,
+ set_ram_duration_target/2, set_maximum_since_use/2,
+ emit_consumers_local/3, internal_delete/3]).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include_lib("stdlib/include/qlc.hrl").
+-include("amqqueue.hrl").
+
+-define(INTEGER_ARG_TYPES, [byte, short, signedint, long,
+ unsignedbyte, unsignedshort, unsignedint]).
+
+-define(MORE_CONSUMER_CREDIT_AFTER, 50).
+
+-define(IS_CLASSIC(QPid), is_pid(QPid)).
+-define(IS_QUORUM(QPid), is_tuple(QPid)).
+%%----------------------------------------------------------------------------
+
+-export_type([name/0, qmsg/0, absent_reason/0]).
+
+-type name() :: rabbit_types:r('queue').
+
+-type qpids() :: [pid()].
+-type qlen() :: rabbit_types:ok(non_neg_integer()).
+-type qfun(A) :: fun ((amqqueue:amqqueue()) -> A | no_return()).
+-type qmsg() :: {name(), pid() | {atom(), pid()}, msg_id(),
+ boolean(), rabbit_types:message()}.
+-type msg_id() :: non_neg_integer().
+-type ok_or_errors() ::
+ 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}.
+-type absent_reason() :: 'nodedown' | 'crashed' | stopped | timeout.
+-type queue_not_found() :: not_found.
+-type queue_absent() :: {'absent', amqqueue:amqqueue(), absent_reason()}.
+-type not_found_or_absent() :: queue_not_found() | queue_absent().
+
+%%----------------------------------------------------------------------------
+
+-define(CONSUMER_INFO_KEYS,
+ [queue_name, channel_pid, consumer_tag, ack_required, prefetch_count,
+ active, activity_status, arguments]).
+
+warn_file_limit() ->
+ DurableQueues = find_recoverable_queues(),
+ L = length(DurableQueues),
+
+ %% if there are not enough file handles, the server might hang
+ %% when trying to recover queues, warn the user:
+ case file_handle_cache:get_limit() < L of
+ true ->
+ rabbit_log:warning(
+ "Recovering ~p queues, available file handles: ~p. Please increase max open file handles limit to at least ~p!~n",
+ [L, file_handle_cache:get_limit(), L]);
+ false ->
+ ok
+ end.
+
+-spec recover(rabbit_types:vhost()) ->
+ {Recovered :: [amqqueue:amqqueue()],
+ Failed :: [amqqueue:amqqueue()]}.
+recover(VHost) ->
+ AllDurable = find_local_durable_queues(VHost),
+ rabbit_queue_type:recover(VHost, AllDurable).
+
+filter_pid_per_type(QPids) ->
+ lists:partition(fun(QPid) -> ?IS_CLASSIC(QPid) end, QPids).
+
+filter_resource_per_type(Resources) ->
+ Queues = [begin
+ {ok, Q} = lookup(Resource),
+ QPid = amqqueue:get_pid(Q),
+ {Resource, QPid}
+ end || Resource <- Resources],
+ lists:partition(fun({_Resource, QPid}) -> ?IS_CLASSIC(QPid) end, Queues).
+
+-spec stop(rabbit_types:vhost()) -> 'ok'.
+stop(VHost) ->
+ %% Classic queues
+ ok = rabbit_amqqueue_sup_sup:stop_for_vhost(VHost),
+ {ok, BQ} = application:get_env(rabbit, backing_queue_module),
+ ok = BQ:stop(VHost),
+ rabbit_quorum_queue:stop(VHost).
+
+-spec start([amqqueue:amqqueue()]) -> 'ok'.
+
+start(Qs) ->
+ %% At this point all recovered queues and their bindings are
+ %% visible to routing, so now it is safe for them to complete
+ %% their initialisation (which may involve interacting with other
+ %% queues).
+ _ = [amqqueue:get_pid(Q) ! {self(), go}
+ || Q <- Qs,
+ %% All queues are supposed to be classic here.
+ amqqueue:is_classic(Q)],
+ ok.
+
+mark_local_durable_queues_stopped(VHost) ->
+ ?try_mnesia_tx_or_upgrade_amqqueue_and_retry(
+ do_mark_local_durable_queues_stopped(VHost),
+ do_mark_local_durable_queues_stopped(VHost)).
+
+do_mark_local_durable_queues_stopped(VHost) ->
+ Qs = find_local_durable_queues(VHost),
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ [ store_queue(amqqueue:set_state(Q, stopped))
+ || Q <- Qs, amqqueue:get_type(Q) =:= rabbit_classic_queue,
+ amqqueue:get_state(Q) =/= stopped ]
+ end).
+
+find_local_durable_queues(VHost) ->
+ mnesia:async_dirty(
+ fun () ->
+ qlc:e(
+ qlc:q(
+ [Q || Q <- mnesia:table(rabbit_durable_queue),
+ amqqueue:get_vhost(Q) =:= VHost andalso
+ rabbit_queue_type:is_recoverable(Q)
+ ]))
+ end).
+
+find_recoverable_queues() ->
+ mnesia:async_dirty(
+ fun () ->
+ qlc:e(qlc:q([Q || Q <- mnesia:table(rabbit_durable_queue),
+ rabbit_queue_type:is_recoverable(Q)]))
+ end).
+
+-spec declare(name(),
+ boolean(),
+ boolean(),
+ rabbit_framing:amqp_table(),
+ rabbit_types:maybe(pid()),
+ rabbit_types:username()) ->
+ {'new' | 'existing' | 'owner_died', amqqueue:amqqueue()} |
+ {'new', amqqueue:amqqueue(), rabbit_fifo_client:state()} |
+ {'absent', amqqueue:amqqueue(), absent_reason()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+declare(QueueName, Durable, AutoDelete, Args, Owner, ActingUser) ->
+ declare(QueueName, Durable, AutoDelete, Args, Owner, ActingUser, node()).
+
+
+%% The Node argument suggests where the queue (master if mirrored)
+%% should be. Note that in some cases (e.g. with "nodes" policy in
+%% effect) this might not be possible to satisfy.
+
+-spec declare(name(),
+ boolean(),
+ boolean(),
+ rabbit_framing:amqp_table(),
+ rabbit_types:maybe(pid()),
+ rabbit_types:username(),
+ node()) ->
+ {'new' | 'existing' | 'owner_died', amqqueue:amqqueue()} |
+ {'absent', amqqueue:amqqueue(), absent_reason()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+declare(QueueName = #resource{virtual_host = VHost}, Durable, AutoDelete, Args,
+ Owner, ActingUser, Node) ->
+ ok = check_declare_arguments(QueueName, Args),
+ Type = get_queue_type(Args),
+ case rabbit_queue_type:is_enabled(Type) of
+ true ->
+ Q0 = amqqueue:new(QueueName,
+ none,
+ Durable,
+ AutoDelete,
+ Owner,
+ Args,
+ VHost,
+ #{user => ActingUser},
+ Type),
+ Q = rabbit_queue_decorator:set(
+ rabbit_policy:set(Q0)),
+ rabbit_queue_type:declare(Q, Node);
+ false ->
+ {protocol_error, internal_error,
+ "Cannot declare a queue '~s' of type '~s' on node '~s': "
+ "the corresponding feature flag is disabled",
+ [rabbit_misc:rs(QueueName), Type, Node]}
+ end.
+
+get_queue_type(Args) ->
+ case rabbit_misc:table_lookup(Args, <<"x-queue-type">>) of
+ undefined ->
+ rabbit_queue_type:default();
+ {_, V} ->
+ rabbit_queue_type:discover(V)
+ end.
+
+-spec internal_declare(amqqueue:amqqueue(), boolean()) ->
+ {created | existing, amqqueue:amqqueue()} | queue_absent().
+
+internal_declare(Q, Recover) ->
+ ?try_mnesia_tx_or_upgrade_amqqueue_and_retry(
+ do_internal_declare(Q, Recover),
+ begin
+ Q1 = amqqueue:upgrade(Q),
+ do_internal_declare(Q1, Recover)
+ end).
+
+do_internal_declare(Q, true) ->
+ rabbit_misc:execute_mnesia_tx_with_tail(
+ fun () ->
+ ok = store_queue(amqqueue:set_state(Q, live)),
+ rabbit_misc:const({created, Q})
+ end);
+do_internal_declare(Q, false) ->
+ QueueName = amqqueue:get_name(Q),
+ rabbit_misc:execute_mnesia_tx_with_tail(
+ fun () ->
+ case mnesia:wread({rabbit_queue, QueueName}) of
+ [] ->
+ case not_found_or_absent(QueueName) of
+ not_found -> Q1 = rabbit_policy:set(Q),
+ Q2 = amqqueue:set_state(Q1, live),
+ ok = store_queue(Q2),
+ fun () -> {created, Q2} end;
+ {absent, _Q, _} = R -> rabbit_misc:const(R)
+ end;
+ [ExistingQ] ->
+ rabbit_misc:const({existing, ExistingQ})
+ end
+ end).
+
+-spec update
+ (name(), fun((amqqueue:amqqueue()) -> amqqueue:amqqueue())) ->
+ 'not_found' | amqqueue:amqqueue().
+
+update(Name, Fun) ->
+ case mnesia:wread({rabbit_queue, Name}) of
+ [Q] ->
+ Durable = amqqueue:is_durable(Q),
+ Q1 = Fun(Q),
+ ok = mnesia:write(rabbit_queue, Q1, write),
+ case Durable of
+ true -> ok = mnesia:write(rabbit_durable_queue, Q1, write);
+ _ -> ok
+ end,
+ Q1;
+ [] ->
+ not_found
+ end.
+
+%% only really used for quorum queues to ensure the rabbit_queue record
+%% is initialised
+ensure_rabbit_queue_record_is_initialized(Q) ->
+ ?try_mnesia_tx_or_upgrade_amqqueue_and_retry(
+ do_ensure_rabbit_queue_record_is_initialized(Q),
+ begin
+ Q1 = amqqueue:upgrade(Q),
+ do_ensure_rabbit_queue_record_is_initialized(Q1)
+ end).
+
+do_ensure_rabbit_queue_record_is_initialized(Q) ->
+ rabbit_misc:execute_mnesia_tx_with_tail(
+ fun () ->
+ ok = store_queue(Q),
+ rabbit_misc:const({ok, Q})
+ end).
+
+-spec store_queue(amqqueue:amqqueue()) -> 'ok'.
+
+store_queue(Q) when ?amqqueue_is_durable(Q) ->
+ Q1 = amqqueue:reset_mirroring_and_decorators(Q),
+ ok = mnesia:write(rabbit_durable_queue, Q1, write),
+ store_queue_ram(Q);
+store_queue(Q) when not ?amqqueue_is_durable(Q) ->
+ store_queue_ram(Q).
+
+store_queue_ram(Q) ->
+ ok = mnesia:write(rabbit_queue, rabbit_queue_decorator:set(Q), write).
+
+-spec update_decorators(name()) -> 'ok'.
+
+update_decorators(Name) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ case mnesia:wread({rabbit_queue, Name}) of
+ [Q] -> store_queue_ram(Q),
+ ok;
+ [] -> ok
+ end
+ end).
+
+-spec policy_changed(amqqueue:amqqueue(), amqqueue:amqqueue()) ->
+ 'ok'.
+
+policy_changed(Q1, Q2) ->
+ Decorators1 = amqqueue:get_decorators(Q1),
+ Decorators2 = amqqueue:get_decorators(Q2),
+ rabbit_mirror_queue_misc:update_mirrors(Q1, Q2),
+ D1 = rabbit_queue_decorator:select(Decorators1),
+ D2 = rabbit_queue_decorator:select(Decorators2),
+ [ok = M:policy_changed(Q1, Q2) || M <- lists:usort(D1 ++ D2)],
+ %% Make sure we emit a stats event even if nothing
+ %% mirroring-related has changed - the policy may have changed anyway.
+ notify_policy_changed(Q2).
+
+is_policy_applicable(QName, Policy) ->
+ case lookup(QName) of
+ {ok, Q} ->
+ rabbit_queue_type:is_policy_applicable(Q, Policy);
+ _ ->
+ %% Defaults to previous behaviour. Apply always
+ true
+ end.
+
+is_server_named_allowed(Args) ->
+ Type = get_queue_type(Args),
+ rabbit_queue_type:is_server_named_allowed(Type).
+
+-spec lookup
+ (name()) ->
+ rabbit_types:ok(amqqueue:amqqueue()) |
+ rabbit_types:error('not_found');
+ ([name()]) ->
+ [amqqueue:amqqueue()].
+
+lookup([]) -> []; %% optimisation
+lookup([Name]) -> ets:lookup(rabbit_queue, Name); %% optimisation
+lookup(Names) when is_list(Names) ->
+ %% Normally we'd call mnesia:dirty_read/1 here, but that is quite
+ %% expensive for reasons explained in rabbit_misc:dirty_read/1.
+ lists:append([ets:lookup(rabbit_queue, Name) || Name <- Names]);
+lookup(Name) ->
+ rabbit_misc:dirty_read({rabbit_queue, Name}).
+
+-spec lookup_many ([name()]) -> [amqqueue:amqqueue()].
+
+lookup_many(Names) when is_list(Names) ->
+ lookup(Names).
+
+-spec not_found_or_absent(name()) -> not_found_or_absent().
+
+not_found_or_absent(Name) ->
+ %% NB: we assume that the caller has already performed a lookup on
+ %% rabbit_queue and not found anything
+ case mnesia:read({rabbit_durable_queue, Name}) of
+ [] -> not_found;
+ [Q] -> {absent, Q, nodedown} %% Q exists on stopped node
+ end.
+
+-spec not_found_or_absent_dirty(name()) -> not_found_or_absent().
+
+not_found_or_absent_dirty(Name) ->
+ %% We should read from both tables inside a tx, to get a
+ %% consistent view. But the chances of an inconsistency are small,
+ %% and only affect the error kind.
+ case rabbit_misc:dirty_read({rabbit_durable_queue, Name}) of
+ {error, not_found} -> not_found;
+ {ok, Q} -> {absent, Q, nodedown}
+ end.
+
+-spec get_rebalance_lock(pid()) ->
+ {true, {rebalance_queues, pid()}} | false.
+get_rebalance_lock(Pid) when is_pid(Pid) ->
+ Id = {rebalance_queues, Pid},
+ Nodes = [node()|nodes()],
+ %% Note that we're not re-trying. We want to immediately know
+ %% if a re-balance is taking place and stop accordingly.
+ case global:set_lock(Id, Nodes, 0) of
+ true ->
+ {true, Id};
+ false ->
+ false
+ end.
+
+-spec rebalance('all' | 'quorum' | 'classic', binary(), binary()) ->
+ {ok, [{node(), pos_integer()}]} | {error, term()}.
+rebalance(Type, VhostSpec, QueueSpec) ->
+ %% We have not yet acquired the rebalance_queues global lock.
+ maybe_rebalance(get_rebalance_lock(self()), Type, VhostSpec, QueueSpec).
+
+maybe_rebalance({true, Id}, Type, VhostSpec, QueueSpec) ->
+ rabbit_log:info("Starting queue rebalance operation: '~s' for vhosts matching '~s' and queues matching '~s'",
+ [Type, VhostSpec, QueueSpec]),
+ Running = rabbit_nodes:all_running(),
+ NumRunning = length(Running),
+ ToRebalance = [Q || Q <- rabbit_amqqueue:list(),
+ filter_per_type(Type, Q),
+ is_replicated(Q),
+ is_match(amqqueue:get_vhost(Q), VhostSpec) andalso
+ is_match(get_resource_name(amqqueue:get_name(Q)), QueueSpec)],
+ NumToRebalance = length(ToRebalance),
+ ByNode = group_by_node(ToRebalance),
+ Rem = case (NumToRebalance rem NumRunning) of
+ 0 -> 0;
+ _ -> 1
+ end,
+ MaxQueuesDesired = (NumToRebalance div NumRunning) + Rem,
+ Result = iterative_rebalance(ByNode, MaxQueuesDesired),
+ global:del_lock(Id),
+ rabbit_log:info("Finished queue rebalance operation"),
+ Result;
+maybe_rebalance(false, _Type, _VhostSpec, _QueueSpec) ->
+ rabbit_log:warning("Queue rebalance operation is in progress, please wait."),
+ {error, rebalance_in_progress}.
+
+filter_per_type(all, _) ->
+ true;
+filter_per_type(quorum, Q) ->
+ ?amqqueue_is_quorum(Q);
+filter_per_type(classic, Q) ->
+ ?amqqueue_is_classic(Q).
+
+rebalance_module(Q) when ?amqqueue_is_quorum(Q) ->
+ rabbit_quorum_queue;
+rebalance_module(Q) when ?amqqueue_is_classic(Q) ->
+ rabbit_mirror_queue_misc.
+
+get_resource_name(#resource{name = Name}) ->
+ Name.
+
+is_match(Subj, E) ->
+ nomatch /= re:run(Subj, E).
+
+iterative_rebalance(ByNode, MaxQueuesDesired) ->
+ case maybe_migrate(ByNode, MaxQueuesDesired) of
+ {ok, Summary} ->
+ rabbit_log:info("All queue masters are balanced"),
+ {ok, Summary};
+ {migrated, Other} ->
+ iterative_rebalance(Other, MaxQueuesDesired);
+ {not_migrated, Other} ->
+ iterative_rebalance(Other, MaxQueuesDesired)
+ end.
+
+maybe_migrate(ByNode, MaxQueuesDesired) ->
+ maybe_migrate(ByNode, MaxQueuesDesired, maps:keys(ByNode)).
+
+maybe_migrate(ByNode, _, []) ->
+ {ok, maps:fold(fun(K, V, Acc) ->
+ {CQs, QQs} = lists:partition(fun({_, Q, _}) ->
+ ?amqqueue_is_classic(Q)
+ end, V),
+ [[{<<"Node name">>, K}, {<<"Number of quorum queues">>, length(QQs)},
+ {<<"Number of classic queues">>, length(CQs)}] | Acc]
+ end, [], ByNode)};
+maybe_migrate(ByNode, MaxQueuesDesired, [N | Nodes]) ->
+ case maps:get(N, ByNode, []) of
+ [{_, Q, false} = Queue | Queues] = All when length(All) > MaxQueuesDesired ->
+ Name = amqqueue:get_name(Q),
+ Module = rebalance_module(Q),
+ OtherNodes = Module:get_replicas(Q) -- [N],
+ case OtherNodes of
+ [] ->
+ {not_migrated, update_not_migrated_queue(N, Queue, Queues, ByNode)};
+ _ ->
+ [{Length, Destination} | _] = sort_by_number_of_queues(OtherNodes, ByNode),
+ rabbit_log:warning("Migrating queue ~p from node ~p with ~p queues to node ~p with ~p queues",
+ [Name, N, length(All), Destination, Length]),
+ case Module:transfer_leadership(Q, Destination) of
+ {migrated, NewNode} ->
+ rabbit_log:warning("Queue ~p migrated to ~p", [Name, NewNode]),
+ {migrated, update_migrated_queue(Destination, N, Queue, Queues, ByNode)};
+ {not_migrated, Reason} ->
+ rabbit_log:warning("Error migrating queue ~p: ~p", [Name, Reason]),
+ {not_migrated, update_not_migrated_queue(N, Queue, Queues, ByNode)}
+ end
+ end;
+ [{_, _, true} | _] = All when length(All) > MaxQueuesDesired ->
+ rabbit_log:warning("Node ~p contains ~p queues, but all have already migrated. "
+ "Do nothing", [N, length(All)]),
+ maybe_migrate(ByNode, MaxQueuesDesired, Nodes);
+ All ->
+ rabbit_log:warning("Node ~p only contains ~p queues, do nothing",
+ [N, length(All)]),
+ maybe_migrate(ByNode, MaxQueuesDesired, Nodes)
+ end.
+
+update_not_migrated_queue(N, {Entries, Q, _}, Queues, ByNode) ->
+ maps:update(N, Queues ++ [{Entries, Q, true}], ByNode).
+
+update_migrated_queue(NewNode, OldNode, {Entries, Q, _}, Queues, ByNode) ->
+ maps:update_with(NewNode,
+ fun(L) -> L ++ [{Entries, Q, true}] end,
+ [{Entries, Q, true}], maps:update(OldNode, Queues, ByNode)).
+
+sort_by_number_of_queues(Nodes, ByNode) ->
+ lists:keysort(1,
+ lists:map(fun(Node) ->
+ {num_queues(Node, ByNode), Node}
+ end, Nodes)).
+
+num_queues(Node, ByNode) ->
+ length(maps:get(Node, ByNode, [])).
+
+group_by_node(Queues) ->
+ ByNode = lists:foldl(fun(Q, Acc) ->
+ Module = rebalance_module(Q),
+ Length = Module:queue_length(Q),
+ maps:update_with(amqqueue:qnode(Q),
+ fun(L) -> [{Length, Q, false} | L] end,
+ [{Length, Q, false}], Acc)
+ end, #{}, Queues),
+ maps:map(fun(_K, V) -> lists:keysort(1, V) end, ByNode).
+
+-spec with(name(),
+ qfun(A),
+ fun((not_found_or_absent()) -> rabbit_types:channel_exit())) ->
+ A | rabbit_types:channel_exit().
+
+with(Name, F, E) ->
+ with(Name, F, E, 2000).
+
+with(#resource{} = Name, F, E, RetriesLeft) ->
+ case lookup(Name) of
+ {ok, Q} when ?amqqueue_state_is(Q, live) andalso RetriesLeft =:= 0 ->
+ %% Something bad happened to that queue, we are bailing out
+ %% on processing current request.
+ E({absent, Q, timeout});
+ {ok, Q} when ?amqqueue_state_is(Q, stopped) andalso RetriesLeft =:= 0 ->
+ %% The queue was stopped and not migrated
+ E({absent, Q, stopped});
+ %% The queue process has crashed with unknown error
+ {ok, Q} when ?amqqueue_state_is(Q, crashed) ->
+ E({absent, Q, crashed});
+ %% The queue process has been stopped by a supervisor.
+ %% In that case a synchronised mirror can take over
+ %% so we should retry.
+ {ok, Q} when ?amqqueue_state_is(Q, stopped) ->
+ %% The queue process was stopped by the supervisor
+ rabbit_misc:with_exit_handler(
+ fun () -> retry_wait(Q, F, E, RetriesLeft) end,
+ fun () -> F(Q) end);
+ %% The queue is supposed to be active.
+ %% The master node can go away or queue can be killed
+ %% so we retry, waiting for a mirror to take over.
+ {ok, Q} when ?amqqueue_state_is(Q, live) ->
+ %% We check is_process_alive(QPid) in case we receive a
+ %% nodedown (for example) in F() that has nothing to do
+ %% with the QPid. F() should be written s.t. that this
+ %% cannot happen, so we bail if it does since that
+ %% indicates a code bug and we don't want to get stuck in
+ %% the retry loop.
+ rabbit_misc:with_exit_handler(
+ fun () -> retry_wait(Q, F, E, RetriesLeft) end,
+ fun () -> F(Q) end);
+ {error, not_found} ->
+ E(not_found_or_absent_dirty(Name))
+ end.
+
+-spec retry_wait(amqqueue:amqqueue(),
+ qfun(A),
+ fun((not_found_or_absent()) -> rabbit_types:channel_exit()),
+ non_neg_integer()) ->
+ A | rabbit_types:channel_exit().
+
+retry_wait(Q, F, E, RetriesLeft) ->
+ Name = amqqueue:get_name(Q),
+ QPid = amqqueue:get_pid(Q),
+ QState = amqqueue:get_state(Q),
+ case {QState, is_replicated(Q)} of
+ %% We don't want to repeat an operation if
+ %% there are no mirrors to migrate to
+ {stopped, false} ->
+ E({absent, Q, stopped});
+ _ ->
+ case rabbit_mnesia:is_process_alive(QPid) of
+ true ->
+ % rabbitmq-server#1682
+ % The old check would have crashed here,
+ % instead, log it and run the exit fun. absent & alive is weird,
+ % but better than crashing with badmatch,true
+ rabbit_log:debug("Unexpected alive queue process ~p~n", [QPid]),
+ E({absent, Q, alive});
+ false ->
+ ok % Expected result
+ end,
+ timer:sleep(30),
+ with(Name, F, E, RetriesLeft - 1)
+ end.
+
+-spec with(name(), qfun(A)) ->
+ A | rabbit_types:error(not_found_or_absent()).
+
+with(Name, F) -> with(Name, F, fun (E) -> {error, E} end).
+
+-spec with_or_die(name(), qfun(A)) -> A | rabbit_types:channel_exit().
+
+with_or_die(Name, F) ->
+ with(Name, F, die_fun(Name)).
+
+-spec die_fun(name()) ->
+ fun((not_found_or_absent()) -> rabbit_types:channel_exit()).
+
+die_fun(Name) ->
+ fun (not_found) -> not_found(Name);
+ ({absent, Q, Reason}) -> absent(Q, Reason)
+ end.
+
+-spec not_found(name()) -> rabbit_types:channel_exit().
+
+not_found(R) -> rabbit_misc:protocol_error(not_found, "no ~s", [rabbit_misc:rs(R)]).
+
+-spec absent(amqqueue:amqqueue(), absent_reason()) ->
+ rabbit_types:channel_exit().
+
+absent(Q, AbsentReason) ->
+ QueueName = amqqueue:get_name(Q),
+ QPid = amqqueue:get_pid(Q),
+ IsDurable = amqqueue:is_durable(Q),
+ priv_absent(QueueName, QPid, IsDurable, AbsentReason).
+
+-spec priv_absent(name(), pid(), boolean(), absent_reason()) ->
+ rabbit_types:channel_exit().
+
+priv_absent(QueueName, QPid, true, nodedown) ->
+ %% The assertion of durability is mainly there because we mention
+ %% durability in the error message. That way we will hopefully
+ %% notice if at some future point our logic changes s.t. we get
+ %% here with non-durable queues.
+ rabbit_misc:protocol_error(
+ not_found,
+ "home node '~s' of durable ~s is down or inaccessible",
+ [node(QPid), rabbit_misc:rs(QueueName)]);
+
+priv_absent(QueueName, _QPid, _IsDurable, stopped) ->
+ rabbit_misc:protocol_error(
+ not_found,
+ "~s process is stopped by supervisor", [rabbit_misc:rs(QueueName)]);
+
+priv_absent(QueueName, _QPid, _IsDurable, crashed) ->
+ rabbit_misc:protocol_error(
+ not_found,
+ "~s has crashed and failed to restart", [rabbit_misc:rs(QueueName)]);
+
+priv_absent(QueueName, _QPid, _IsDurable, timeout) ->
+ rabbit_misc:protocol_error(
+ not_found,
+ "failed to perform operation on ~s due to timeout", [rabbit_misc:rs(QueueName)]);
+
+priv_absent(QueueName, QPid, _IsDurable, alive) ->
+ rabbit_misc:protocol_error(
+ not_found,
+ "failed to perform operation on ~s: its master replica ~w may be stopping or being demoted",
+ [rabbit_misc:rs(QueueName), QPid]).
+
+-spec assert_equivalence
+ (amqqueue:amqqueue(), boolean(), boolean(),
+ rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) ->
+ 'ok' | rabbit_types:channel_exit() | rabbit_types:connection_exit().
+
+assert_equivalence(Q, DurableDeclare, AutoDeleteDeclare, Args1, Owner) ->
+ QName = amqqueue:get_name(Q),
+ DurableQ = amqqueue:is_durable(Q),
+ AutoDeleteQ = amqqueue:is_auto_delete(Q),
+ ok = check_exclusive_access(Q, Owner, strict),
+ ok = rabbit_misc:assert_field_equivalence(DurableQ, DurableDeclare, QName, durable),
+ ok = rabbit_misc:assert_field_equivalence(AutoDeleteQ, AutoDeleteDeclare, QName, auto_delete),
+ ok = assert_args_equivalence(Q, Args1).
+
+-spec check_exclusive_access(amqqueue:amqqueue(), pid()) ->
+ 'ok' | rabbit_types:channel_exit().
+
+check_exclusive_access(Q, Owner) -> check_exclusive_access(Q, Owner, lax).
+
+check_exclusive_access(Q, Owner, _MatchType)
+ when ?amqqueue_exclusive_owner_is(Q, Owner) ->
+ ok;
+check_exclusive_access(Q, _ReaderPid, lax)
+ when ?amqqueue_exclusive_owner_is(Q, none) ->
+ ok;
+check_exclusive_access(Q, _ReaderPid, _MatchType) ->
+ QueueName = amqqueue:get_name(Q),
+ rabbit_misc:protocol_error(
+ resource_locked,
+ "cannot obtain exclusive access to locked ~s. It could be originally "
+ "declared on another connection or the exclusive property value does not "
+ "match that of the original declaration.",
+ [rabbit_misc:rs(QueueName)]).
+
+-spec with_exclusive_access_or_die(name(), pid(), qfun(A)) ->
+ A | rabbit_types:channel_exit().
+
+with_exclusive_access_or_die(Name, ReaderPid, F) ->
+ with_or_die(Name,
+ fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end).
+
+assert_args_equivalence(Q, RequiredArgs) ->
+ QueueName = amqqueue:get_name(Q),
+ Args = amqqueue:get_arguments(Q),
+ rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName,
+ [Key || {Key, _Fun} <- declare_args()]).
+
+check_declare_arguments(QueueName, Args) ->
+ check_arguments(QueueName, Args, declare_args()).
+
+check_consume_arguments(QueueName, Args) ->
+ check_arguments(QueueName, Args, consume_args()).
+
+check_arguments(QueueName, Args, Validators) ->
+ [case rabbit_misc:table_lookup(Args, Key) of
+ undefined -> ok;
+ TypeVal -> case Fun(TypeVal, Args) of
+ ok -> ok;
+ {error, Error} -> rabbit_misc:protocol_error(
+ precondition_failed,
+ "invalid arg '~s' for ~s: ~255p",
+ [Key, rabbit_misc:rs(QueueName),
+ Error])
+ end
+ end || {Key, Fun} <- Validators],
+ ok.
+
+declare_args() ->
+ [{<<"x-expires">>, fun check_expires_arg/2},
+ {<<"x-message-ttl">>, fun check_message_ttl_arg/2},
+ {<<"x-dead-letter-exchange">>, fun check_dlxname_arg/2},
+ {<<"x-dead-letter-routing-key">>, fun check_dlxrk_arg/2},
+ {<<"x-max-length">>, fun check_non_neg_int_arg/2},
+ {<<"x-max-length-bytes">>, fun check_non_neg_int_arg/2},
+ {<<"x-max-in-memory-length">>, fun check_non_neg_int_arg/2},
+ {<<"x-max-in-memory-bytes">>, fun check_non_neg_int_arg/2},
+ {<<"x-max-priority">>, fun check_max_priority_arg/2},
+ {<<"x-overflow">>, fun check_overflow/2},
+ {<<"x-queue-mode">>, fun check_queue_mode/2},
+ {<<"x-single-active-consumer">>, fun check_single_active_consumer_arg/2},
+ {<<"x-queue-type">>, fun check_queue_type/2},
+ {<<"x-quorum-initial-group-size">>, fun check_initial_cluster_size_arg/2},
+ {<<"x-max-age">>, fun check_max_age_arg/2},
+ {<<"x-max-segment-size">>, fun check_non_neg_int_arg/2},
+ {<<"x-initial-cluster-size">>, fun check_initial_cluster_size_arg/2},
+ {<<"x-queue-leader-locator">>, fun check_queue_leader_locator_arg/2}].
+
+consume_args() -> [{<<"x-priority">>, fun check_int_arg/2},
+ {<<"x-cancel-on-ha-failover">>, fun check_bool_arg/2}].
+
+check_int_arg({Type, _}, _) ->
+ case lists:member(Type, ?INTEGER_ARG_TYPES) of
+ true -> ok;
+ false -> {error, {unacceptable_type, Type}}
+ end.
+
+check_bool_arg({bool, _}, _) -> ok;
+check_bool_arg({Type, _}, _) -> {error, {unacceptable_type, Type}}.
+
+check_non_neg_int_arg({Type, Val}, Args) ->
+ case check_int_arg({Type, Val}, Args) of
+ ok when Val >= 0 -> ok;
+ ok -> {error, {value_negative, Val}};
+ Error -> Error
+ end.
+
+check_expires_arg({Type, Val}, Args) ->
+ case check_int_arg({Type, Val}, Args) of
+ ok when Val == 0 -> {error, {value_zero, Val}};
+ ok -> rabbit_misc:check_expiry(Val);
+ Error -> Error
+ end.
+
+check_message_ttl_arg({Type, Val}, Args) ->
+ case check_int_arg({Type, Val}, Args) of
+ ok -> rabbit_misc:check_expiry(Val);
+ Error -> Error
+ end.
+
+check_max_priority_arg({Type, Val}, Args) ->
+ case check_non_neg_int_arg({Type, Val}, Args) of
+ ok when Val =< ?MAX_SUPPORTED_PRIORITY -> ok;
+ ok -> {error, {max_value_exceeded, Val}};
+ Error -> Error
+ end.
+
+check_single_active_consumer_arg({Type, Val}, Args) ->
+ case check_bool_arg({Type, Val}, Args) of
+ ok -> ok;
+ Error -> Error
+ end.
+
+check_initial_cluster_size_arg({Type, Val}, Args) ->
+ case check_non_neg_int_arg({Type, Val}, Args) of
+ ok when Val == 0 -> {error, {value_zero, Val}};
+ ok -> ok;
+ Error -> Error
+ end.
+
+check_max_age_arg({longstr, Val}, _Args) ->
+ case check_max_age(Val) of
+ {error, _} = E ->
+ E;
+ _ ->
+ ok
+ end;
+check_max_age_arg({Type, _}, _Args) ->
+ {error, {unacceptable_type, Type}}.
+
+check_max_age(MaxAge) ->
+ case re:run(MaxAge, "(^[0-9]*)(.*)", [{capture, all_but_first, list}]) of
+ {match, [Value, Unit]} ->
+ case list_to_integer(Value) of
+ I when I > 0 ->
+ case lists:member(Unit, ["Y", "M", "D", "h", "m", "s"]) of
+ true ->
+ Int = list_to_integer(Value),
+ Int * unit_value_in_ms(Unit);
+ false ->
+ {error, invalid_max_age}
+ end;
+ _ ->
+ {error, invalid_max_age}
+ end;
+ _ ->
+ {error, invalid_max_age}
+ end.
+
+unit_value_in_ms("Y") ->
+ 365 * unit_value_in_ms("D");
+unit_value_in_ms("M") ->
+ 30 * unit_value_in_ms("D");
+unit_value_in_ms("D") ->
+ 24 * unit_value_in_ms("h");
+unit_value_in_ms("h") ->
+ 3600 * unit_value_in_ms("s");
+unit_value_in_ms("m") ->
+ 60 * unit_value_in_ms("s");
+unit_value_in_ms("s") ->
+ 1000.
+
+%% Note that the validity of x-dead-letter-exchange is already verified
+%% by rabbit_channel's queue.declare handler.
+check_dlxname_arg({longstr, _}, _) -> ok;
+check_dlxname_arg({Type, _}, _) -> {error, {unacceptable_type, Type}}.
+
+check_dlxrk_arg({longstr, _}, Args) ->
+ case rabbit_misc:table_lookup(Args, <<"x-dead-letter-exchange">>) of
+ undefined -> {error, routing_key_but_no_dlx_defined};
+ _ -> ok
+ end;
+check_dlxrk_arg({Type, _}, _Args) ->
+ {error, {unacceptable_type, Type}}.
+
+check_overflow({longstr, Val}, _Args) ->
+ case lists:member(Val, [<<"drop-head">>,
+ <<"reject-publish">>,
+ <<"reject-publish-dlx">>]) of
+ true -> ok;
+ false -> {error, invalid_overflow}
+ end;
+check_overflow({Type, _}, _Args) ->
+ {error, {unacceptable_type, Type}}.
+
+check_queue_leader_locator_arg({longstr, Val}, _Args) ->
+ case lists:member(Val, [<<"client-local">>,
+ <<"random">>,
+ <<"least-leaders">>]) of
+ true -> ok;
+ false -> {error, invalid_queue_locator_arg}
+ end;
+check_queue_leader_locator_arg({Type, _}, _Args) ->
+ {error, {unacceptable_type, Type}}.
+
+check_queue_mode({longstr, Val}, _Args) ->
+ case lists:member(Val, [<<"default">>, <<"lazy">>]) of
+ true -> ok;
+ false -> {error, invalid_queue_mode}
+ end;
+check_queue_mode({Type, _}, _Args) ->
+ {error, {unacceptable_type, Type}}.
+
+check_queue_type({longstr, Val}, _Args) ->
+ case lists:member(Val, [<<"classic">>, <<"quorum">>, <<"stream">>]) of
+ true -> ok;
+ false -> {error, invalid_queue_type}
+ end;
+check_queue_type({Type, _}, _Args) ->
+ {error, {unacceptable_type, Type}}.
+
+-spec list() -> [amqqueue:amqqueue()].
+
+list() ->
+ list_with_possible_retry(fun do_list/0).
+
+do_list() ->
+ mnesia:dirty_match_object(rabbit_queue, amqqueue:pattern_match_all()).
+
+-spec count() -> non_neg_integer().
+
+count() ->
+ mnesia:table_info(rabbit_queue, size).
+
+-spec list_names() -> [rabbit_amqqueue:name()].
+
+list_names() -> mnesia:dirty_all_keys(rabbit_queue).
+
+list_names(VHost) -> [amqqueue:get_name(Q) || Q <- list(VHost)].
+
+list_local_names() ->
+ [ amqqueue:get_name(Q) || Q <- list(),
+ amqqueue:get_state(Q) =/= crashed, is_local_to_node(amqqueue:get_pid(Q), node())].
+
+list_local_names_down() ->
+ [ amqqueue:get_name(Q) || Q <- list(),
+ is_down(Q),
+ is_local_to_node(amqqueue:get_pid(Q), node())].
+
+is_down(Q) ->
+ try
+ info(Q, [state]) == [{state, down}]
+ catch
+ _:_ ->
+ true
+ end.
+
+
+-spec sample_local_queues() -> [amqqueue:amqqueue()].
+sample_local_queues() -> sample_n_by_name(list_local_names(), 300).
+
+-spec sample_n_by_name([rabbit_amqqueue:name()], pos_integer()) -> [amqqueue:amqqueue()].
+sample_n_by_name([], _N) ->
+ [];
+sample_n_by_name(Names, N) when is_list(Names) andalso is_integer(N) andalso N > 0 ->
+ %% lists:nth/2 throws when position is > list length
+ M = erlang:min(N, length(Names)),
+ Ids = lists:foldl(fun( _, Acc) when length(Acc) >= 100 ->
+ Acc;
+ (_, Acc) ->
+ Pick = lists:nth(rand:uniform(M), Names),
+ [Pick | Acc]
+ end,
+ [], lists:seq(1, M)),
+ lists:map(fun (Id) ->
+ {ok, Q} = rabbit_amqqueue:lookup(Id),
+ Q
+ end,
+ lists:usort(Ids)).
+
+-spec sample_n([amqqueue:amqqueue()], pos_integer()) -> [amqqueue:amqqueue()].
+sample_n([], _N) ->
+ [];
+sample_n(Queues, N) when is_list(Queues) andalso is_integer(N) andalso N > 0 ->
+ Names = [amqqueue:get_name(Q) || Q <- Queues],
+ sample_n_by_name(Names, N).
+
+
+-spec list_by_type(atom()) -> [amqqueue:amqqueue()].
+
+list_by_type(classic) -> list_by_type(rabbit_classic_queue);
+list_by_type(quorum) -> list_by_type(rabbit_quorum_queue);
+list_by_type(Type) ->
+ {atomic, Qs} =
+ mnesia:sync_transaction(
+ fun () ->
+ mnesia:match_object(rabbit_durable_queue,
+ amqqueue:pattern_match_on_type(Type),
+ read)
+ end),
+ Qs.
+
+-spec list_local_quorum_queue_names() -> [rabbit_amqqueue:name()].
+
+list_local_quorum_queue_names() ->
+ [ amqqueue:get_name(Q) || Q <- list_by_type(quorum),
+ amqqueue:get_state(Q) =/= crashed,
+ lists:member(node(), get_quorum_nodes(Q))].
+
+-spec list_local_quorum_queues() -> [amqqueue:amqqueue()].
+list_local_quorum_queues() ->
+ [ Q || Q <- list_by_type(quorum),
+ amqqueue:get_state(Q) =/= crashed,
+ lists:member(node(), get_quorum_nodes(Q))].
+
+-spec list_local_leaders() -> [amqqueue:amqqueue()].
+list_local_leaders() ->
+ [ Q || Q <- list(),
+ amqqueue:is_quorum(Q),
+ amqqueue:get_state(Q) =/= crashed, amqqueue:get_leader(Q) =:= node()].
+
+-spec list_local_followers() -> [amqqueue:amqqueue()].
+list_local_followers() ->
+ [Q
+ || Q <- list(),
+ amqqueue:is_quorum(Q),
+ amqqueue:get_state(Q) =/= crashed,
+ amqqueue:get_leader(Q) =/= node(),
+ rabbit_quorum_queue:is_recoverable(Q)
+ ].
+
+-spec list_local_mirrored_classic_queues() -> [amqqueue:amqqueue()].
+list_local_mirrored_classic_queues() ->
+ [ Q || Q <- list(),
+ amqqueue:get_state(Q) =/= crashed,
+ amqqueue:is_classic(Q),
+ is_local_to_node(amqqueue:get_pid(Q), node()),
+ is_replicated(Q)].
+
+-spec list_local_mirrored_classic_names() -> [rabbit_amqqueue:name()].
+list_local_mirrored_classic_names() ->
+ [ amqqueue:get_name(Q) || Q <- list(),
+ amqqueue:get_state(Q) =/= crashed,
+ amqqueue:is_classic(Q),
+ is_local_to_node(amqqueue:get_pid(Q), node()),
+ is_replicated(Q)].
+
+-spec list_local_mirrored_classic_without_synchronised_mirrors() ->
+ [amqqueue:amqqueue()].
+list_local_mirrored_classic_without_synchronised_mirrors() ->
+ [ Q || Q <- list(),
+ amqqueue:get_state(Q) =/= crashed,
+ amqqueue:is_classic(Q),
+ %% filter out exclusive queues as they won't actually be mirrored
+ is_not_exclusive(Q),
+ is_local_to_node(amqqueue:get_pid(Q), node()),
+ is_replicated(Q),
+ not has_synchronised_mirrors_online(Q)].
+
+-spec list_local_mirrored_classic_without_synchronised_mirrors_for_cli() ->
+ [#{binary => any()}].
+list_local_mirrored_classic_without_synchronised_mirrors_for_cli() ->
+ ClassicQs = list_local_mirrored_classic_without_synchronised_mirrors(),
+ [begin
+ #resource{name = Name} = amqqueue:get_name(Q),
+ #{
+ <<"readable_name">> => rabbit_data_coercion:to_binary(rabbit_misc:rs(amqqueue:get_name(Q))),
+ <<"name">> => Name,
+ <<"virtual_host">> => amqqueue:get_vhost(Q),
+ <<"type">> => <<"classic">>
+ }
+ end || Q <- ClassicQs].
+
+is_local_to_node(QPid, Node) when ?IS_CLASSIC(QPid) ->
+ Node =:= node(QPid);
+is_local_to_node({_, Leader} = QPid, Node) when ?IS_QUORUM(QPid) ->
+ Node =:= Leader.
+
+-spec list(rabbit_types:vhost()) -> [amqqueue:amqqueue()].
+
+list(VHostPath) ->
+ list(VHostPath, rabbit_queue).
+
+list(VHostPath, TableName) ->
+ list_with_possible_retry(fun() -> do_list(VHostPath, TableName) end).
+
+%% Not dirty_match_object since that would not be transactional when used in a
+%% tx context
+do_list(VHostPath, TableName) ->
+ mnesia:async_dirty(
+ fun () ->
+ mnesia:match_object(
+ TableName,
+ amqqueue:pattern_match_on_name(rabbit_misc:r(VHostPath, queue)),
+ read)
+ end).
+
+list_with_possible_retry(Fun) ->
+ %% amqqueue migration:
+ %% The `rabbit_queue` or `rabbit_durable_queue` tables
+ %% might be migrated between the time we query the pattern
+ %% (with the `amqqueue` module) and the time we call
+ %% `mnesia:dirty_match_object()`. This would lead to an empty list
+ %% (no object matching the now incorrect pattern), not a Mnesia
+ %% error.
+ %%
+ %% So if the result is an empty list and the version of the
+ %% `amqqueue` record changed in between, we retry the operation.
+ %%
+ %% However, we don't do this if inside a Mnesia transaction: we
+ %% could end up with a live lock between this started transaction
+ %% and the Mnesia table migration which is blocked (but the
+ %% rabbit_feature_flags lock is held).
+ AmqqueueRecordVersion = amqqueue:record_version_to_use(),
+ case Fun() of
+ [] ->
+ case mnesia:is_transaction() of
+ true ->
+ [];
+ false ->
+ case amqqueue:record_version_to_use() of
+ AmqqueueRecordVersion -> [];
+ _ -> Fun()
+ end
+ end;
+ Ret ->
+ Ret
+ end.
+
+-spec list_down(rabbit_types:vhost()) -> [amqqueue:amqqueue()].
+
+list_down(VHostPath) ->
+ case rabbit_vhost:exists(VHostPath) of
+ false -> [];
+ true ->
+ Present = list(VHostPath),
+ Durable = list(VHostPath, rabbit_durable_queue),
+ PresentS = sets:from_list([amqqueue:get_name(Q) || Q <- Present]),
+ sets:to_list(sets:filter(fun (Q) ->
+ N = amqqueue:get_name(Q),
+ not sets:is_element(N, PresentS)
+ end, sets:from_list(Durable)))
+ end.
+
+count(VHost) ->
+ try
+ %% this is certainly suboptimal but there is no way to count
+ %% things using a secondary index in Mnesia. Our counter-table-per-node
+ %% won't work here because with master migration of mirrored queues
+ %% the "ownership" of queues by nodes becomes a non-trivial problem
+ %% that requires a proper consensus algorithm.
+ length(list_for_count(VHost))
+ catch _:Err ->
+ rabbit_log:error("Failed to fetch number of queues in vhost ~p:~n~p~n",
+ [VHost, Err]),
+ 0
+ end.
+
+list_for_count(VHost) ->
+ list_with_possible_retry(
+ fun() ->
+ mnesia:dirty_index_read(rabbit_queue,
+ VHost,
+ amqqueue:field_vhost())
+ end).
+
+-spec info_keys() -> rabbit_types:info_keys().
+
+%% It should no default to classic queue keys, but a subset of those that must be shared
+%% by all queue types. Not sure this is even being used, so will leave it here for backwards
+%% compatibility. Each queue type handles now info(Q, all_keys) with the keys it supports.
+info_keys() -> rabbit_amqqueue_process:info_keys().
+
+map(Qs, F) -> rabbit_misc:filter_exit_map(F, Qs).
+
+is_unresponsive(Q, _Timeout) when ?amqqueue_state_is(Q, crashed) ->
+ false;
+is_unresponsive(Q, Timeout) when ?amqqueue_is_classic(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ try
+ delegate:invoke(QPid, {gen_server2, call, [{info, [name]}, Timeout]}),
+ false
+ catch
+ %% TODO catch any exit??
+ exit:{timeout, _} ->
+ true
+ end;
+is_unresponsive(Q, Timeout) when ?amqqueue_is_quorum(Q) ->
+ try
+ Leader = amqqueue:get_pid(Q),
+ case rabbit_fifo_client:stat(Leader, Timeout) of
+ {ok, _, _} -> false;
+ {timeout, _} -> true;
+ {error, _} -> true
+ end
+ catch
+ exit:{timeout, _} ->
+ true
+ end.
+
+format(Q) when ?amqqueue_is_quorum(Q) -> rabbit_quorum_queue:format(Q);
+format(Q) -> rabbit_amqqueue_process:format(Q).
+
+-spec info(amqqueue:amqqueue()) -> rabbit_types:infos().
+
+info(Q) when ?is_amqqueue(Q) -> rabbit_queue_type:info(Q, all_keys).
+
+
+-spec info(amqqueue:amqqueue(), rabbit_types:info_keys()) ->
+ rabbit_types:infos().
+
+info(Q, Items) when ?is_amqqueue(Q) ->
+ rabbit_queue_type:info(Q, Items).
+
+info_down(Q, DownReason) ->
+ rabbit_queue_type:info_down(Q, DownReason).
+
+info_down(Q, Items, DownReason) ->
+ rabbit_queue_type:info_down(Q, Items, DownReason).
+
+-spec info_all(rabbit_types:vhost()) -> [rabbit_types:infos()].
+
+info_all(VHostPath) ->
+ map(list(VHostPath), fun (Q) -> info(Q) end) ++
+ map(list_down(VHostPath), fun (Q) -> info_down(Q, down) end).
+
+-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys()) ->
+ [rabbit_types:infos()].
+
+info_all(VHostPath, Items) ->
+ map(list(VHostPath), fun (Q) -> info(Q, Items) end) ++
+ map(list_down(VHostPath), fun (Q) -> info_down(Q, Items, down) end).
+
+emit_info_local(VHostPath, Items, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map_with_exit_handler(
+ AggregatorPid, Ref, fun(Q) -> info(Q, Items) end, list_local(VHostPath)).
+
+emit_info_all(Nodes, VHostPath, Items, Ref, AggregatorPid) ->
+ Pids = [ spawn_link(Node, rabbit_amqqueue, emit_info_local, [VHostPath, Items, Ref, AggregatorPid]) || Node <- Nodes ],
+ rabbit_control_misc:await_emitters_termination(Pids).
+
+collect_info_all(VHostPath, Items) ->
+ Nodes = rabbit_nodes:all_running(),
+ Ref = make_ref(),
+ Pids = [ spawn_link(Node, rabbit_amqqueue, emit_info_local, [VHostPath, Items, Ref, self()]) || Node <- Nodes ],
+ rabbit_control_misc:await_emitters_termination(Pids),
+ wait_for_queues(Ref, length(Pids), []).
+
+wait_for_queues(Ref, N, Acc) ->
+ receive
+ {Ref, finished} when N == 1 ->
+ Acc;
+ {Ref, finished} ->
+ wait_for_queues(Ref, N - 1, Acc);
+ {Ref, Items, continue} ->
+ wait_for_queues(Ref, N, [Items | Acc])
+ after
+ 1000 ->
+ Acc
+ end.
+
+emit_info_down(VHostPath, Items, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map_with_exit_handler(
+ AggregatorPid, Ref, fun(Q) -> info_down(Q, Items, down) end,
+ list_down(VHostPath)).
+
+emit_unresponsive_local(VHostPath, Items, Timeout, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map_with_exit_handler(
+ AggregatorPid, Ref, fun(Q) -> case is_unresponsive(Q, Timeout) of
+ true -> info_down(Q, Items, unresponsive);
+ false -> []
+ end
+ end, list_local(VHostPath)
+ ).
+
+emit_unresponsive(Nodes, VHostPath, Items, Timeout, Ref, AggregatorPid) ->
+ Pids = [ spawn_link(Node, rabbit_amqqueue, emit_unresponsive_local,
+ [VHostPath, Items, Timeout, Ref, AggregatorPid]) || Node <- Nodes ],
+ rabbit_control_misc:await_emitters_termination(Pids).
+
+info_local(VHostPath) ->
+ map(list_local(VHostPath), fun (Q) -> info(Q, [name]) end).
+
+list_local(VHostPath) ->
+ [Q || Q <- list(VHostPath),
+ amqqueue:get_state(Q) =/= crashed, is_local_to_node(amqqueue:get_pid(Q), node())].
+
+-spec force_event_refresh(reference()) -> 'ok'.
+
+% Note: https://www.pivotaltracker.com/story/show/166962656
+% This event is necessary for the stats timer to be initialized with
+% the correct values once the management agent has started
+force_event_refresh(Ref) ->
+ %% note: quorum queuse emit stats on periodic ticks that run unconditionally,
+ %% so force_event_refresh is unnecessary (and, in fact, would only produce log noise) for QQs.
+ ClassicQs = list_by_type(rabbit_classic_queue),
+ [gen_server2:cast(amqqueue:get_pid(Q),
+ {force_event_refresh, Ref}) || Q <- ClassicQs],
+ ok.
+
+-spec notify_policy_changed(amqqueue:amqqueue()) -> 'ok'.
+notify_policy_changed(Q) when ?is_amqqueue(Q) ->
+ rabbit_queue_type:policy_changed(Q).
+
+-spec consumers(amqqueue:amqqueue()) ->
+ [{pid(), rabbit_types:ctag(), boolean(), non_neg_integer(),
+ boolean(), atom(),
+ rabbit_framing:amqp_table(), rabbit_types:username()}].
+
+consumers(Q) when ?amqqueue_is_classic(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ delegate:invoke(QPid, {gen_server2, call, [consumers, infinity]});
+consumers(Q) when ?amqqueue_is_quorum(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ case ra:local_query(QPid, fun rabbit_fifo:query_consumers/1) of
+ {ok, {_, Result}, _} -> maps:values(Result);
+ _ -> []
+ end;
+consumers(Q) when ?amqqueue_is_stream(Q) ->
+ %% TODO how??? they only exist on the channel
+ %% we could list the offset listener on the writer but we don't even have a consumer tag,
+ %% only a (channel) pid and offset
+ [].
+
+-spec consumer_info_keys() -> rabbit_types:info_keys().
+
+consumer_info_keys() -> ?CONSUMER_INFO_KEYS.
+
+-spec consumers_all(rabbit_types:vhost()) ->
+ [{name(), pid(), rabbit_types:ctag(), boolean(),
+ non_neg_integer(), rabbit_framing:amqp_table()}].
+
+consumers_all(VHostPath) ->
+ ConsumerInfoKeys = consumer_info_keys(),
+ lists:append(
+ map(list(VHostPath),
+ fun(Q) -> get_queue_consumer_info(Q, ConsumerInfoKeys) end)).
+
+emit_consumers_all(Nodes, VHostPath, Ref, AggregatorPid) ->
+ Pids = [ spawn_link(Node, rabbit_amqqueue, emit_consumers_local, [VHostPath, Ref, AggregatorPid]) || Node <- Nodes ],
+ rabbit_control_misc:await_emitters_termination(Pids),
+ ok.
+
+emit_consumers_local(VHostPath, Ref, AggregatorPid) ->
+ ConsumerInfoKeys = consumer_info_keys(),
+ rabbit_control_misc:emitting_map(
+ AggregatorPid, Ref,
+ fun(Q) -> get_queue_consumer_info(Q, ConsumerInfoKeys) end,
+ list_local(VHostPath)).
+
+get_queue_consumer_info(Q, ConsumerInfoKeys) ->
+ [lists:zip(ConsumerInfoKeys,
+ [amqqueue:get_name(Q), ChPid, CTag,
+ AckRequired, Prefetch, Active, ActivityStatus, Args]) ||
+ {ChPid, CTag, AckRequired, Prefetch, Active, ActivityStatus, Args, _} <- consumers(Q)].
+
+-spec stat(amqqueue:amqqueue()) ->
+ {'ok', non_neg_integer(), non_neg_integer()}.
+stat(Q) ->
+ rabbit_queue_type:stat(Q).
+
+-spec pid_of(amqqueue:amqqueue()) ->
+ pid().
+
+pid_of(Q) -> amqqueue:get_pid(Q).
+
+-spec pid_of(rabbit_types:vhost(), rabbit_misc:resource_name()) ->
+ pid() | rabbit_types:error('not_found').
+
+pid_of(VHost, QueueName) ->
+ case lookup(rabbit_misc:r(VHost, queue, QueueName)) of
+ {ok, Q} -> pid_of(Q);
+ {error, not_found} = E -> E
+ end.
+
+-spec delete_exclusive(qpids(), pid()) -> 'ok'.
+
+delete_exclusive(QPids, ConnId) ->
+ rabbit_amqqueue_common:delete_exclusive(QPids, ConnId).
+
+-spec delete_immediately(qpids()) -> 'ok'.
+
+delete_immediately(QPids) ->
+ {Classic, Quorum} = filter_pid_per_type(QPids),
+ [gen_server2:cast(QPid, delete_immediately) || QPid <- Classic],
+ case Quorum of
+ [] -> ok;
+ _ -> {error, cannot_delete_quorum_queues, Quorum}
+ end.
+
+delete_immediately_by_resource(Resources) ->
+ {Classic, Quorum} = filter_resource_per_type(Resources),
+ [gen_server2:cast(QPid, delete_immediately) || {_, QPid} <- Classic],
+ [rabbit_quorum_queue:delete_immediately(Resource, QPid)
+ || {Resource, QPid} <- Quorum],
+ ok.
+
+-spec delete
+ (amqqueue:amqqueue(), 'false', 'false', rabbit_types:username()) ->
+ qlen() |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()};
+ (amqqueue:amqqueue(), 'true' , 'false', rabbit_types:username()) ->
+ qlen() | rabbit_types:error('in_use') |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()};
+ (amqqueue:amqqueue(), 'false', 'true', rabbit_types:username()) ->
+ qlen() | rabbit_types:error('not_empty') |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()};
+ (amqqueue:amqqueue(), 'true' , 'true', rabbit_types:username()) ->
+ qlen() |
+ rabbit_types:error('in_use') |
+ rabbit_types:error('not_empty') |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+delete(Q, IfUnused, IfEmpty, ActingUser) ->
+ rabbit_queue_type:delete(Q, IfUnused, IfEmpty, ActingUser).
+
+%% delete_crashed* INCLUDED FOR BACKWARDS COMPATBILITY REASONS
+delete_crashed(Q) when ?amqqueue_is_classic(Q) ->
+ rabbit_classic_queue:delete_crashed(Q).
+
+delete_crashed(Q, ActingUser) when ?amqqueue_is_classic(Q) ->
+ rabbit_classic_queue:delete_crashed(Q, ActingUser).
+
+-spec delete_crashed_internal(amqqueue:amqqueue(), rabbit_types:username()) -> 'ok'.
+delete_crashed_internal(Q, ActingUser) when ?amqqueue_is_classic(Q) ->
+ rabbit_classic_queue:delete_crashed_internal(Q, ActingUser).
+
+-spec purge(amqqueue:amqqueue()) -> qlen().
+purge(Q) when ?is_amqqueue(Q) ->
+ rabbit_queue_type:purge(Q).
+
+-spec requeue(name(),
+ {rabbit_fifo:consumer_tag(), [msg_id()]},
+ rabbit_queue_type:state()) ->
+ {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()}.
+requeue(QRef, {CTag, MsgIds}, QStates) ->
+ reject(QRef, true, {CTag, MsgIds}, QStates).
+
+-spec ack(name(),
+ {rabbit_fifo:consumer_tag(), [msg_id()]},
+ rabbit_queue_type:state()) ->
+ {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()}.
+ack(QPid, {CTag, MsgIds}, QueueStates) ->
+ rabbit_queue_type:settle(QPid, complete, CTag, MsgIds, QueueStates).
+
+
+-spec reject(name(),
+ boolean(),
+ {rabbit_fifo:consumer_tag(), [msg_id()]},
+ rabbit_queue_type:state()) ->
+ {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()}.
+reject(QRef, Requeue, {CTag, MsgIds}, QStates) ->
+ Op = case Requeue of
+ true -> requeue;
+ false -> discard
+ end,
+ rabbit_queue_type:settle(QRef, Op, CTag, MsgIds, QStates).
+
+-spec notify_down_all(qpids(), pid()) -> ok_or_errors().
+notify_down_all(QPids, ChPid) ->
+ notify_down_all(QPids, ChPid, ?CHANNEL_OPERATION_TIMEOUT).
+
+-spec notify_down_all(qpids(), pid(), non_neg_integer()) ->
+ ok_or_errors().
+notify_down_all(QPids, ChPid, Timeout) ->
+ case rpc:call(node(), delegate, invoke,
+ [QPids, {gen_server2, call, [{notify_down, ChPid}, infinity]}], Timeout) of
+ {badrpc, timeout} -> {error, {channel_operation_timeout, Timeout}};
+ {badrpc, Reason} -> {error, Reason};
+ {_, Bads} ->
+ case lists:filter(
+ fun ({_Pid, {exit, {R, _}, _}}) ->
+ rabbit_misc:is_abnormal_exit(R);
+ ({_Pid, _}) -> false
+ end, Bads) of
+ [] -> ok;
+ Bads1 -> {error, Bads1}
+ end;
+ Error -> {error, Error}
+ end.
+
+-spec activate_limit_all(qpids(), pid()) -> ok.
+
+activate_limit_all(QRefs, ChPid) ->
+ QPids = [P || P <- QRefs, ?IS_CLASSIC(P)],
+ delegate:invoke_no_result(QPids, {gen_server2, cast,
+ [{activate_limit, ChPid}]}).
+
+-spec credit(amqqueue:amqqueue(),
+ rabbit_types:ctag(),
+ non_neg_integer(),
+ boolean(),
+ rabbit_queue_type:state()) ->
+ {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()}.
+credit(Q, CTag, Credit, Drain, QStates) ->
+ rabbit_queue_type:credit(Q, CTag, Credit, Drain, QStates).
+
+-spec basic_get(amqqueue:amqqueue(), boolean(), pid(), rabbit_types:ctag(),
+ rabbit_queue_type:state()) ->
+ {'ok', non_neg_integer(), qmsg(), rabbit_queue_type:state()} |
+ {'empty', rabbit_queue_type:state()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+basic_get(Q, NoAck, LimiterPid, CTag, QStates0) ->
+ rabbit_queue_type:dequeue(Q, NoAck, LimiterPid, CTag, QStates0).
+
+
+-spec basic_consume(amqqueue:amqqueue(), boolean(), pid(), pid(), boolean(),
+ non_neg_integer(), rabbit_types:ctag(), boolean(),
+ rabbit_framing:amqp_table(), any(), rabbit_types:username(),
+ rabbit_queue_type:state()) ->
+ {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()} |
+ {error, term()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+basic_consume(Q, NoAck, ChPid, LimiterPid,
+ LimiterActive, ConsumerPrefetchCount, ConsumerTag,
+ ExclusiveConsume, Args, OkMsg, ActingUser, Contexts) ->
+
+ QName = amqqueue:get_name(Q),
+ %% first phase argument validation
+ %% each queue type may do further validations
+ ok = check_consume_arguments(QName, Args),
+ Spec = #{no_ack => NoAck,
+ channel_pid => ChPid,
+ limiter_pid => LimiterPid,
+ limiter_active => LimiterActive,
+ prefetch_count => ConsumerPrefetchCount,
+ consumer_tag => ConsumerTag,
+ exclusive_consume => ExclusiveConsume,
+ args => Args,
+ ok_msg => OkMsg,
+ acting_user => ActingUser},
+ rabbit_queue_type:consume(Q, Spec, Contexts).
+
+-spec basic_cancel(amqqueue:amqqueue(), rabbit_types:ctag(), any(),
+ rabbit_types:username(),
+ rabbit_queue_type:state()) ->
+ {ok, rabbit_queue_type:state()} | {error, term()}.
+basic_cancel(Q, ConsumerTag, OkMsg, ActingUser, QStates) ->
+ rabbit_queue_type:cancel(Q, ConsumerTag,
+ OkMsg, ActingUser, QStates).
+
+-spec notify_decorators(amqqueue:amqqueue()) -> 'ok'.
+
+notify_decorators(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ delegate:invoke_no_result(QPid, {gen_server2, cast, [notify_decorators]}).
+
+notify_sent(QPid, ChPid) ->
+ rabbit_amqqueue_common:notify_sent(QPid, ChPid).
+
+notify_sent_queue_down(QPid) ->
+ rabbit_amqqueue_common:notify_sent_queue_down(QPid).
+
+-spec resume(pid(), pid()) -> 'ok'.
+
+resume(QPid, ChPid) -> delegate:invoke_no_result(QPid, {gen_server2, cast,
+ [{resume, ChPid}]}).
+
+internal_delete1(QueueName, OnlyDurable) ->
+ internal_delete1(QueueName, OnlyDurable, normal).
+
+internal_delete1(QueueName, OnlyDurable, Reason) ->
+ ok = mnesia:delete({rabbit_queue, QueueName}),
+ case Reason of
+ auto_delete ->
+ case mnesia:wread({rabbit_durable_queue, QueueName}) of
+ [] -> ok;
+ [_] -> ok = mnesia:delete({rabbit_durable_queue, QueueName})
+ end;
+ _ ->
+ mnesia:delete({rabbit_durable_queue, QueueName})
+ end,
+ %% we want to execute some things, as decided by rabbit_exchange,
+ %% after the transaction.
+ rabbit_binding:remove_for_destination(QueueName, OnlyDurable).
+
+-spec internal_delete(name(), rabbit_types:username()) -> 'ok'.
+
+internal_delete(QueueName, ActingUser) ->
+ internal_delete(QueueName, ActingUser, normal).
+
+internal_delete(QueueName, ActingUser, Reason) ->
+ rabbit_misc:execute_mnesia_tx_with_tail(
+ fun () ->
+ case {mnesia:wread({rabbit_queue, QueueName}),
+ mnesia:wread({rabbit_durable_queue, QueueName})} of
+ {[], []} ->
+ rabbit_misc:const(ok);
+ _ ->
+ Deletions = internal_delete1(QueueName, false, Reason),
+ T = rabbit_binding:process_deletions(Deletions,
+ ?INTERNAL_USER),
+ fun() ->
+ ok = T(),
+ rabbit_core_metrics:queue_deleted(QueueName),
+ ok = rabbit_event:notify(queue_deleted,
+ [{name, QueueName},
+ {user_who_performed_action, ActingUser}])
+ end
+ end
+ end).
+
+-spec forget_all_durable(node()) -> 'ok'.
+
+forget_all_durable(Node) ->
+ %% Note rabbit is not running so we avoid e.g. the worker pool. Also why
+ %% we don't invoke the return from rabbit_binding:process_deletions/1.
+ {atomic, ok} =
+ mnesia:sync_transaction(
+ fun () ->
+ Qs = mnesia:match_object(rabbit_durable_queue,
+ amqqueue:pattern_match_all(), write),
+ [forget_node_for_queue(Node, Q) ||
+ Q <- Qs,
+ is_local_to_node(amqqueue:get_pid(Q), Node)],
+ ok
+ end),
+ ok.
+
+%% Try to promote a mirror while down - it should recover as a
+%% master. We try to take the oldest mirror here for best chance of
+%% recovery.
+forget_node_for_queue(_DeadNode, Q)
+ when ?amqqueue_is_quorum(Q) ->
+ ok;
+forget_node_for_queue(DeadNode, Q) ->
+ RS = amqqueue:get_recoverable_slaves(Q),
+ forget_node_for_queue(DeadNode, RS, Q).
+
+forget_node_for_queue(_DeadNode, [], Q) ->
+ %% No mirrors to recover from, queue is gone.
+ %% Don't process_deletions since that just calls callbacks and we
+ %% are not really up.
+ Name = amqqueue:get_name(Q),
+ internal_delete1(Name, true);
+
+%% Should not happen, but let's be conservative.
+forget_node_for_queue(DeadNode, [DeadNode | T], Q) ->
+ forget_node_for_queue(DeadNode, T, Q);
+
+forget_node_for_queue(DeadNode, [H|T], Q) when ?is_amqqueue(Q) ->
+ Type = amqqueue:get_type(Q),
+ case {node_permits_offline_promotion(H), Type} of
+ {false, _} -> forget_node_for_queue(DeadNode, T, Q);
+ {true, rabbit_classic_queue} ->
+ Q1 = amqqueue:set_pid(Q, rabbit_misc:node_to_fake_pid(H)),
+ ok = mnesia:write(rabbit_durable_queue, Q1, write);
+ {true, rabbit_quorum_queue} ->
+ ok
+ end.
+
+node_permits_offline_promotion(Node) ->
+ case node() of
+ Node -> not rabbit:is_running(); %% [1]
+ _ -> All = rabbit_mnesia:cluster_nodes(all),
+ Running = rabbit_nodes:all_running(),
+ lists:member(Node, All) andalso
+ not lists:member(Node, Running) %% [2]
+ end.
+%% [1] In this case if we are a real running node (i.e. rabbitmqctl
+%% has RPCed into us) then we cannot allow promotion. If on the other
+%% hand we *are* rabbitmqctl impersonating the node for offline
+%% node-forgetting then we can.
+%%
+%% [2] This is simpler; as long as it's down that's OK
+
+-spec run_backing_queue
+ (pid(), atom(), (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) ->
+ 'ok'.
+
+run_backing_queue(QPid, Mod, Fun) ->
+ gen_server2:cast(QPid, {run_backing_queue, Mod, Fun}).
+
+-spec set_ram_duration_target(pid(), number() | 'infinity') -> 'ok'.
+
+set_ram_duration_target(QPid, Duration) ->
+ gen_server2:cast(QPid, {set_ram_duration_target, Duration}).
+
+-spec set_maximum_since_use(pid(), non_neg_integer()) -> 'ok'.
+
+set_maximum_since_use(QPid, Age) ->
+ gen_server2:cast(QPid, {set_maximum_since_use, Age}).
+
+-spec update_mirroring(pid()) -> 'ok'.
+
+update_mirroring(QPid) ->
+ ok = delegate:invoke_no_result(QPid, {gen_server2, cast, [update_mirroring]}).
+
+-spec sync_mirrors(amqqueue:amqqueue() | pid()) ->
+ 'ok' | rabbit_types:error('not_mirrored').
+
+sync_mirrors(Q) when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ delegate:invoke(QPid, {gen_server2, call, [sync_mirrors, infinity]});
+sync_mirrors(QPid) ->
+ delegate:invoke(QPid, {gen_server2, call, [sync_mirrors, infinity]}).
+
+-spec cancel_sync_mirrors(amqqueue:amqqueue() | pid()) ->
+ 'ok' | {'ok', 'not_syncing'}.
+
+cancel_sync_mirrors(Q) when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ delegate:invoke(QPid, {gen_server2, call, [cancel_sync_mirrors, infinity]});
+cancel_sync_mirrors(QPid) ->
+ delegate:invoke(QPid, {gen_server2, call, [cancel_sync_mirrors, infinity]}).
+
+-spec is_replicated(amqqueue:amqqueue()) -> boolean().
+
+is_replicated(Q) when ?amqqueue_is_quorum(Q) ->
+ true;
+is_replicated(Q) ->
+ rabbit_mirror_queue_misc:is_mirrored(Q).
+
+is_exclusive(Q) when ?amqqueue_exclusive_owner_is(Q, none) ->
+ false;
+is_exclusive(Q) when ?amqqueue_exclusive_owner_is_pid(Q) ->
+ true.
+
+is_not_exclusive(Q) ->
+ not is_exclusive(Q).
+
+is_dead_exclusive(Q) when ?amqqueue_exclusive_owner_is(Q, none) ->
+ false;
+is_dead_exclusive(Q) when ?amqqueue_exclusive_owner_is_pid(Q) ->
+ Pid = amqqueue:get_pid(Q),
+ not rabbit_mnesia:is_process_alive(Pid).
+
+-spec has_synchronised_mirrors_online(amqqueue:amqqueue()) -> boolean().
+has_synchronised_mirrors_online(Q) ->
+ %% a queue with all mirrors down would have no mirror pids.
+ %% We treat these as in sync intentionally to avoid false positives.
+ MirrorPids = amqqueue:get_sync_slave_pids(Q),
+ MirrorPids =/= [] andalso lists:any(fun rabbit_misc:is_process_alive/1, MirrorPids).
+
+-spec on_node_up(node()) -> 'ok'.
+
+on_node_up(Node) ->
+ ok = rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ Qs = mnesia:match_object(rabbit_queue,
+ amqqueue:pattern_match_all(), write),
+ [maybe_clear_recoverable_node(Node, Q) || Q <- Qs],
+ ok
+ end).
+
+maybe_clear_recoverable_node(Node, Q) ->
+ SPids = amqqueue:get_sync_slave_pids(Q),
+ RSs = amqqueue:get_recoverable_slaves(Q),
+ case lists:member(Node, RSs) of
+ true ->
+ %% There is a race with
+ %% rabbit_mirror_queue_slave:record_synchronised/1 called
+ %% by the incoming mirror node and this function, called
+ %% by the master node. If this function is executed after
+ %% record_synchronised/1, the node is erroneously removed
+ %% from the recoverable mirrors list.
+ %%
+ %% We check if the mirror node's queue PID is alive. If it is
+ %% the case, then this function is executed after. In this
+ %% situation, we don't touch the queue record, it is already
+ %% correct.
+ DoClearNode =
+ case [SP || SP <- SPids, node(SP) =:= Node] of
+ [SPid] -> not rabbit_misc:is_process_alive(SPid);
+ _ -> true
+ end,
+ if
+ DoClearNode -> RSs1 = RSs -- [Node],
+ store_queue(
+ amqqueue:set_recoverable_slaves(Q, RSs1));
+ true -> ok
+ end;
+ false ->
+ ok
+ end.
+
+-spec on_node_down(node()) -> 'ok'.
+
+on_node_down(Node) ->
+ {QueueNames, QueueDeletions} = delete_queues_on_node_down(Node),
+ notify_queue_binding_deletions(QueueDeletions),
+ rabbit_core_metrics:queues_deleted(QueueNames),
+ notify_queues_deleted(QueueNames),
+ ok.
+
+delete_queues_on_node_down(Node) ->
+ lists:unzip(lists:flatten([
+ rabbit_misc:execute_mnesia_transaction(
+ fun () -> [{Queue, delete_queue(Queue)} || Queue <- Queues] end
+ ) || Queues <- partition_queues(queues_to_delete_when_node_down(Node))
+ ])).
+
+delete_queue(QueueName) ->
+ ok = mnesia:delete({rabbit_queue, QueueName}),
+ rabbit_binding:remove_transient_for_destination(QueueName).
+
+% If there are many queues and we delete them all in a single Mnesia transaction,
+% this can block all other Mnesia operations for a really long time.
+% In situations where a node wants to (re-)join a cluster,
+% Mnesia won't be able to sync on the new node until this operation finishes.
+% As a result, we want to have multiple Mnesia transactions so that other
+% operations can make progress in between these queue delete transactions.
+%
+% 10 queues per Mnesia transaction is an arbitrary number, but it seems to work OK with 50k queues per node.
+partition_queues([Q0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9 | T]) ->
+ [[Q0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9] | partition_queues(T)];
+partition_queues(T) ->
+ [T].
+
+queues_to_delete_when_node_down(NodeDown) ->
+ rabbit_misc:execute_mnesia_transaction(fun () ->
+ qlc:e(qlc:q([amqqueue:get_name(Q) ||
+ Q <- mnesia:table(rabbit_queue),
+ amqqueue:qnode(Q) == NodeDown andalso
+ not rabbit_mnesia:is_process_alive(amqqueue:get_pid(Q)) andalso
+ (not rabbit_amqqueue:is_replicated(Q) orelse
+ rabbit_amqqueue:is_dead_exclusive(Q))]
+ ))
+ end).
+
+notify_queue_binding_deletions(QueueDeletions) ->
+ rabbit_misc:execute_mnesia_tx_with_tail(
+ fun() ->
+ rabbit_binding:process_deletions(
+ lists:foldl(
+ fun rabbit_binding:combine_deletions/2,
+ rabbit_binding:new_deletions(),
+ QueueDeletions
+ ),
+ ?INTERNAL_USER
+ )
+ end
+ ).
+
+notify_queues_deleted(QueueDeletions) ->
+ lists:foreach(
+ fun(Queue) ->
+ ok = rabbit_event:notify(queue_deleted,
+ [{name, Queue},
+ {user, ?INTERNAL_USER}])
+ end,
+ QueueDeletions).
+
+-spec pseudo_queue(name(), pid()) -> amqqueue:amqqueue().
+
+pseudo_queue(QueueName, Pid) ->
+ pseudo_queue(QueueName, Pid, false).
+
+-spec pseudo_queue(name(), pid(), boolean()) -> amqqueue:amqqueue().
+
+pseudo_queue(#resource{kind = queue} = QueueName, Pid, Durable)
+ when is_pid(Pid) andalso
+ is_boolean(Durable) ->
+ amqqueue:new(QueueName,
+ Pid,
+ Durable,
+ false,
+ none, % Owner,
+ [],
+ undefined, % VHost,
+ #{user => undefined}, % ActingUser
+ rabbit_classic_queue % Type
+ ).
+
+-spec immutable(amqqueue:amqqueue()) -> amqqueue:amqqueue().
+
+immutable(Q) -> amqqueue:set_immutable(Q).
+
+-spec deliver([amqqueue:amqqueue()], rabbit_types:delivery()) -> 'ok'.
+
+deliver(Qs, Delivery) ->
+ _ = rabbit_queue_type:deliver(Qs, Delivery, stateless),
+ ok.
+
+get_quorum_nodes(Q) ->
+ case amqqueue:get_type_state(Q) of
+ #{nodes := Nodes} ->
+ Nodes;
+ _ ->
+ []
+ end.
diff --git a/deps/rabbit/src/rabbit_amqqueue_process.erl b/deps/rabbit/src/rabbit_amqqueue_process.erl
new file mode 100644
index 0000000000..abad3b5ad4
--- /dev/null
+++ b/deps/rabbit/src/rabbit_amqqueue_process.erl
@@ -0,0 +1,1849 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_amqqueue_process).
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include_lib("rabbit_common/include/rabbit_framing.hrl").
+-include("amqqueue.hrl").
+
+-behaviour(gen_server2).
+
+-define(SYNC_INTERVAL, 200). %% milliseconds
+-define(RAM_DURATION_UPDATE_INTERVAL, 5000).
+-define(CONSUMER_BIAS_RATIO, 2.0). %% i.e. consume 100% faster
+
+-export([info_keys/0]).
+
+-export([init_with_backing_queue_state/7]).
+
+-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
+ handle_info/2, handle_pre_hibernate/1, prioritise_call/4,
+ prioritise_cast/3, prioritise_info/3, format_message_queue/2]).
+-export([format/1]).
+-export([is_policy_applicable/2]).
+
+%% Queue's state
+-record(q, {
+ %% an #amqqueue record
+ q :: amqqueue:amqqueue(),
+ %% none | {exclusive consumer channel PID, consumer tag} | {single active consumer channel PID, consumer}
+ active_consumer,
+ %% Set to true if a queue has ever had a consumer.
+ %% This is used to determine when to delete auto-delete queues.
+ has_had_consumers,
+ %% backing queue module.
+ %% for mirrored queues, this will be rabbit_mirror_queue_master.
+ %% for non-priority and non-mirrored queues, rabbit_variable_queue.
+ %% see rabbit_backing_queue.
+ backing_queue,
+ %% backing queue state.
+ %% see rabbit_backing_queue, rabbit_variable_queue.
+ backing_queue_state,
+ %% consumers state, see rabbit_queue_consumers
+ consumers,
+ %% queue expiration value
+ expires,
+ %% timer used to periodically sync (flush) queue index
+ sync_timer_ref,
+ %% timer used to update ingress/egress rates and queue RAM duration target
+ rate_timer_ref,
+ %% timer used to clean up this queue due to TTL (on when unused)
+ expiry_timer_ref,
+ %% stats emission timer
+ stats_timer,
+ %% maps message IDs to {channel pid, MsgSeqNo}
+ %% pairs
+ msg_id_to_channel,
+ %% message TTL value
+ ttl,
+ %% timer used to delete expired messages
+ ttl_timer_ref,
+ ttl_timer_expiry,
+ %% Keeps track of channels that publish to this queue.
+ %% When channel process goes down, queues have to perform
+ %% certain cleanup.
+ senders,
+ %% dead letter exchange as a #resource record, if any
+ dlx,
+ dlx_routing_key,
+ %% max length in messages, if configured
+ max_length,
+ %% max length in bytes, if configured
+ max_bytes,
+ %% an action to perform if queue is to be over a limit,
+ %% can be either drop-head (default), reject-publish or reject-publish-dlx
+ overflow,
+ %% when policies change, this version helps queue
+ %% determine what previously scheduled/set up state to ignore,
+ %% e.g. message expiration messages from previously set up timers
+ %% that may or may not be still valid
+ args_policy_version,
+ %% used to discard outdated/superseded policy updates,
+ %% e.g. when policies are applied concurrently. See
+ %% https://github.com/rabbitmq/rabbitmq-server/issues/803 for one
+ %% example.
+ mirroring_policy_version = 0,
+ %% running | flow | idle
+ status,
+ %% true | false
+ single_active_consumer_on
+ }).
+
+%%----------------------------------------------------------------------------
+
+-define(STATISTICS_KEYS,
+ [messages_ready,
+ messages_unacknowledged,
+ messages,
+ reductions,
+ name,
+ policy,
+ operator_policy,
+ effective_policy_definition,
+ exclusive_consumer_pid,
+ exclusive_consumer_tag,
+ single_active_consumer_pid,
+ single_active_consumer_tag,
+ consumers,
+ consumer_utilisation,
+ memory,
+ slave_pids,
+ synchronised_slave_pids,
+ recoverable_slaves,
+ state,
+ garbage_collection
+ ]).
+
+-define(CREATION_EVENT_KEYS,
+ [name,
+ durable,
+ auto_delete,
+ arguments,
+ owner_pid,
+ exclusive,
+ user_who_performed_action
+ ]).
+
+-define(INFO_KEYS, [pid | ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [name]]).
+
+%%----------------------------------------------------------------------------
+
+-spec info_keys() -> rabbit_types:info_keys().
+
+info_keys() -> ?INFO_KEYS ++ rabbit_backing_queue:info_keys().
+statistics_keys() -> ?STATISTICS_KEYS ++ rabbit_backing_queue:info_keys().
+
+%%----------------------------------------------------------------------------
+
+init(Q) ->
+ process_flag(trap_exit, true),
+ ?store_proc_name(amqqueue:get_name(Q)),
+ {ok, init_state(amqqueue:set_pid(Q, self())), hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE},
+ ?MODULE}.
+
+init_state(Q) ->
+ SingleActiveConsumerOn = case rabbit_misc:table_lookup(amqqueue:get_arguments(Q), <<"x-single-active-consumer">>) of
+ {bool, true} -> true;
+ _ -> false
+ end,
+ State = #q{q = Q,
+ active_consumer = none,
+ has_had_consumers = false,
+ consumers = rabbit_queue_consumers:new(),
+ senders = pmon:new(delegate),
+ msg_id_to_channel = #{},
+ status = running,
+ args_policy_version = 0,
+ overflow = 'drop-head',
+ single_active_consumer_on = SingleActiveConsumerOn},
+ rabbit_event:init_stats_timer(State, #q.stats_timer).
+
+init_it(Recover, From, State = #q{q = Q})
+ when ?amqqueue_exclusive_owner_is(Q, none) ->
+ init_it2(Recover, From, State);
+
+%% You used to be able to declare an exclusive durable queue. Sadly we
+%% need to still tidy up after that case, there could be the remnants
+%% of one left over from an upgrade. So that's why we don't enforce
+%% Recover = new here.
+init_it(Recover, From, State = #q{q = Q0}) ->
+ Owner = amqqueue:get_exclusive_owner(Q0),
+ case rabbit_misc:is_process_alive(Owner) of
+ true -> erlang:monitor(process, Owner),
+ init_it2(Recover, From, State);
+ false -> #q{backing_queue = undefined,
+ backing_queue_state = undefined,
+ q = Q} = State,
+ send_reply(From, {owner_died, Q}),
+ BQ = backing_queue_module(Q),
+ {_, Terms} = recovery_status(Recover),
+ BQS = bq_init(BQ, Q, Terms),
+ %% Rely on terminate to delete the queue.
+ log_delete_exclusive(Owner, State),
+ {stop, {shutdown, missing_owner},
+ State#q{backing_queue = BQ, backing_queue_state = BQS}}
+ end.
+
+init_it2(Recover, From, State = #q{q = Q,
+ backing_queue = undefined,
+ backing_queue_state = undefined}) ->
+ {Barrier, TermsOrNew} = recovery_status(Recover),
+ case rabbit_amqqueue:internal_declare(Q, Recover /= new) of
+ {Res, Q1}
+ when ?is_amqqueue(Q1) andalso
+ (Res == created orelse Res == existing) ->
+ case matches(Recover, Q, Q1) of
+ true ->
+ ok = file_handle_cache:register_callback(
+ rabbit_amqqueue, set_maximum_since_use, [self()]),
+ ok = rabbit_memory_monitor:register(
+ self(), {rabbit_amqqueue,
+ set_ram_duration_target, [self()]}),
+ BQ = backing_queue_module(Q1),
+ BQS = bq_init(BQ, Q, TermsOrNew),
+ send_reply(From, {new, Q}),
+ recovery_barrier(Barrier),
+ State1 = process_args_policy(
+ State#q{backing_queue = BQ,
+ backing_queue_state = BQS}),
+ notify_decorators(startup, State),
+ rabbit_event:notify(queue_created,
+ infos(?CREATION_EVENT_KEYS, State1)),
+ rabbit_event:if_enabled(State1, #q.stats_timer,
+ fun() -> emit_stats(State1) end),
+ noreply(State1);
+ false ->
+ {stop, normal, {existing, Q1}, State}
+ end;
+ Err ->
+ {stop, normal, Err, State}
+ end.
+
+recovery_status(new) -> {no_barrier, new};
+recovery_status({Recover, Terms}) -> {Recover, Terms}.
+
+send_reply(none, _Q) -> ok;
+send_reply(From, Q) -> gen_server2:reply(From, Q).
+
+matches(new, Q1, Q2) ->
+ %% i.e. not policy
+ amqqueue:get_name(Q1) =:= amqqueue:get_name(Q2) andalso
+ amqqueue:is_durable(Q1) =:= amqqueue:is_durable(Q2) andalso
+ amqqueue:is_auto_delete(Q1) =:= amqqueue:is_auto_delete(Q2) andalso
+ amqqueue:get_exclusive_owner(Q1) =:= amqqueue:get_exclusive_owner(Q2) andalso
+ amqqueue:get_arguments(Q1) =:= amqqueue:get_arguments(Q2) andalso
+ amqqueue:get_pid(Q1) =:= amqqueue:get_pid(Q2) andalso
+ amqqueue:get_slave_pids(Q1) =:= amqqueue:get_slave_pids(Q2);
+%% FIXME: Should v1 vs. v2 of the same record match?
+matches(_, Q, Q) -> true;
+matches(_, _Q, _Q1) -> false.
+
+recovery_barrier(no_barrier) ->
+ ok;
+recovery_barrier(BarrierPid) ->
+ MRef = erlang:monitor(process, BarrierPid),
+ receive
+ {BarrierPid, go} -> erlang:demonitor(MRef, [flush]);
+ {'DOWN', MRef, process, _, _} -> ok
+ end.
+
+-spec init_with_backing_queue_state
+ (amqqueue:amqqueue(), atom(), tuple(), any(),
+ [rabbit_types:delivery()], pmon:pmon(), map()) ->
+ #q{}.
+
+init_with_backing_queue_state(Q, BQ, BQS,
+ RateTRef, Deliveries, Senders, MTC) ->
+ Owner = amqqueue:get_exclusive_owner(Q),
+ case Owner of
+ none -> ok;
+ _ -> erlang:monitor(process, Owner)
+ end,
+ State = init_state(Q),
+ State1 = State#q{backing_queue = BQ,
+ backing_queue_state = BQS,
+ rate_timer_ref = RateTRef,
+ senders = Senders,
+ msg_id_to_channel = MTC},
+ State2 = process_args_policy(State1),
+ State3 = lists:foldl(fun (Delivery, StateN) ->
+ maybe_deliver_or_enqueue(Delivery, true, StateN)
+ end, State2, Deliveries),
+ notify_decorators(startup, State3),
+ State3.
+
+terminate(shutdown = R, State = #q{backing_queue = BQ, q = Q0}) ->
+ QName = amqqueue:get_name(Q0),
+ rabbit_core_metrics:queue_deleted(qname(State)),
+ terminate_shutdown(
+ fun (BQS) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ [Q] = mnesia:read({rabbit_queue, QName}),
+ Q2 = amqqueue:set_state(Q, stopped),
+ %% amqqueue migration:
+ %% The amqqueue was read from this transaction, no need
+ %% to handle migration.
+ rabbit_amqqueue:store_queue(Q2)
+ end),
+ BQ:terminate(R, BQS)
+ end, State);
+terminate({shutdown, missing_owner} = Reason, State) ->
+ %% if the owner was missing then there will be no queue, so don't emit stats
+ terminate_shutdown(terminate_delete(false, Reason, State), State);
+terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) ->
+ rabbit_core_metrics:queue_deleted(qname(State)),
+ terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State);
+terminate(normal, State = #q{status = {terminated_by, auto_delete}}) ->
+ %% auto_delete case
+ %% To increase performance we want to avoid a mnesia_sync:sync call
+ %% after every transaction, as we could be deleting simultaneously
+ %% thousands of queues. A optimisation introduced by server#1513
+ %% needs to be reverted by this case, avoiding to guard the delete
+ %% operation on `rabbit_durable_queue`
+ terminate_shutdown(terminate_delete(true, auto_delete, State), State);
+terminate(normal, State) -> %% delete case
+ terminate_shutdown(terminate_delete(true, normal, State), State);
+%% If we crashed don't try to clean up the BQS, probably best to leave it.
+terminate(_Reason, State = #q{q = Q}) ->
+ terminate_shutdown(fun (BQS) ->
+ Q2 = amqqueue:set_state(Q, crashed),
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ ?try_mnesia_tx_or_upgrade_amqqueue_and_retry(
+ rabbit_amqqueue:store_queue(Q2),
+ begin
+ Q3 = amqqueue:upgrade(Q2),
+ rabbit_amqqueue:store_queue(Q3)
+ end)
+ end),
+ BQS
+ end, State).
+
+terminate_delete(EmitStats, Reason0,
+ State = #q{q = Q,
+ backing_queue = BQ,
+ status = Status}) ->
+ QName = amqqueue:get_name(Q),
+ ActingUser = terminated_by(Status),
+ fun (BQS) ->
+ Reason = case Reason0 of
+ auto_delete -> normal;
+ Any -> Any
+ end,
+ BQS1 = BQ:delete_and_terminate(Reason, BQS),
+ if EmitStats -> rabbit_event:if_enabled(State, #q.stats_timer,
+ fun() -> emit_stats(State) end);
+ true -> ok
+ end,
+ %% This try-catch block transforms throws to errors since throws are not
+ %% logged.
+ try
+ %% don't care if the internal delete doesn't return 'ok'.
+ rabbit_amqqueue:internal_delete(QName, ActingUser, Reason0)
+ catch
+ {error, ReasonE} -> error(ReasonE)
+ end,
+ BQS1
+ end.
+
+terminated_by({terminated_by, auto_delete}) ->
+ ?INTERNAL_USER;
+terminated_by({terminated_by, ActingUser}) ->
+ ActingUser;
+terminated_by(_) ->
+ ?INTERNAL_USER.
+
+terminate_shutdown(Fun, #q{status = Status} = State) ->
+ ActingUser = terminated_by(Status),
+ State1 = #q{backing_queue_state = BQS, consumers = Consumers} =
+ lists:foldl(fun (F, S) -> F(S) end, State,
+ [fun stop_sync_timer/1,
+ fun stop_rate_timer/1,
+ fun stop_expiry_timer/1,
+ fun stop_ttl_timer/1]),
+ case BQS of
+ undefined -> State1;
+ _ -> ok = rabbit_memory_monitor:deregister(self()),
+ QName = qname(State),
+ notify_decorators(shutdown, State),
+ [emit_consumer_deleted(Ch, CTag, QName, ActingUser) ||
+ {Ch, CTag, _, _, _, _, _, _} <-
+ rabbit_queue_consumers:all(Consumers)],
+ State1#q{backing_queue_state = Fun(BQS)}
+ end.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%----------------------------------------------------------------------------
+
+maybe_notify_decorators(false, State) -> State;
+maybe_notify_decorators(true, State) -> notify_decorators(State), State.
+
+notify_decorators(Event, State) -> decorator_callback(qname(State), Event, []).
+
+notify_decorators(State = #q{consumers = Consumers,
+ backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ P = rabbit_queue_consumers:max_active_priority(Consumers),
+ decorator_callback(qname(State), consumer_state_changed,
+ [P, BQ:is_empty(BQS)]).
+
+decorator_callback(QName, F, A) ->
+ %% Look up again in case policy and hence decorators have changed
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} ->
+ Ds = amqqueue:get_decorators(Q),
+ [ok = apply(M, F, [Q|A]) || M <- rabbit_queue_decorator:select(Ds)];
+ {error, not_found} ->
+ ok
+ end.
+
+bq_init(BQ, Q, Recover) ->
+ Self = self(),
+ BQ:init(Q, Recover,
+ fun (Mod, Fun) ->
+ rabbit_amqqueue:run_backing_queue(Self, Mod, Fun)
+ end).
+
+process_args_policy(State = #q{q = Q,
+ args_policy_version = N}) ->
+ ArgsTable =
+ [{<<"expires">>, fun res_min/2, fun init_exp/2},
+ {<<"dead-letter-exchange">>, fun res_arg/2, fun init_dlx/2},
+ {<<"dead-letter-routing-key">>, fun res_arg/2, fun init_dlx_rkey/2},
+ {<<"message-ttl">>, fun res_min/2, fun init_ttl/2},
+ {<<"max-length">>, fun res_min/2, fun init_max_length/2},
+ {<<"max-length-bytes">>, fun res_min/2, fun init_max_bytes/2},
+ {<<"overflow">>, fun res_arg/2, fun init_overflow/2},
+ {<<"queue-mode">>, fun res_arg/2, fun init_queue_mode/2}],
+ drop_expired_msgs(
+ lists:foldl(fun({Name, Resolve, Fun}, StateN) ->
+ Fun(rabbit_queue_type_util:args_policy_lookup(Name, Resolve, Q), StateN)
+ end, State#q{args_policy_version = N + 1}, ArgsTable)).
+
+res_arg(_PolVal, ArgVal) -> ArgVal.
+res_min(PolVal, ArgVal) -> erlang:min(PolVal, ArgVal).
+
+%% In both these we init with the undefined variant first to stop any
+%% existing timer, then start a new one which may fire after a
+%% different time.
+init_exp(undefined, State) -> stop_expiry_timer(State#q{expires = undefined});
+init_exp(Expires, State) -> State1 = init_exp(undefined, State),
+ ensure_expiry_timer(State1#q{expires = Expires}).
+
+init_ttl(undefined, State) -> stop_ttl_timer(State#q{ttl = undefined});
+init_ttl(TTL, State) -> (init_ttl(undefined, State))#q{ttl = TTL}.
+
+init_dlx(undefined, State) ->
+ State#q{dlx = undefined};
+init_dlx(DLX, State = #q{q = Q}) ->
+ QName = amqqueue:get_name(Q),
+ State#q{dlx = rabbit_misc:r(QName, exchange, DLX)}.
+
+init_dlx_rkey(RoutingKey, State) -> State#q{dlx_routing_key = RoutingKey}.
+
+init_max_length(MaxLen, State) ->
+ {_Dropped, State1} = maybe_drop_head(State#q{max_length = MaxLen}),
+ State1.
+
+init_max_bytes(MaxBytes, State) ->
+ {_Dropped, State1} = maybe_drop_head(State#q{max_bytes = MaxBytes}),
+ State1.
+
+%% Reset overflow to default 'drop-head' value if it's undefined.
+init_overflow(undefined, #q{overflow = 'drop-head'} = State) ->
+ State;
+init_overflow(undefined, State) ->
+ {_Dropped, State1} = maybe_drop_head(State#q{overflow = 'drop-head'}),
+ State1;
+init_overflow(Overflow, State) ->
+ OverflowVal = binary_to_existing_atom(Overflow, utf8),
+ case OverflowVal of
+ 'drop-head' ->
+ {_Dropped, State1} = maybe_drop_head(State#q{overflow = OverflowVal}),
+ State1;
+ _ ->
+ State#q{overflow = OverflowVal}
+ end.
+
+init_queue_mode(undefined, State) ->
+ State;
+init_queue_mode(Mode, State = #q {backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ BQS1 = BQ:set_queue_mode(binary_to_existing_atom(Mode, utf8), BQS),
+ State#q{backing_queue_state = BQS1}.
+
+reply(Reply, NewState) ->
+ {NewState1, Timeout} = next_state(NewState),
+ {reply, Reply, ensure_stats_timer(ensure_rate_timer(NewState1)), Timeout}.
+
+noreply(NewState) ->
+ {NewState1, Timeout} = next_state(NewState),
+ {noreply, ensure_stats_timer(ensure_rate_timer(NewState1)), Timeout}.
+
+next_state(State = #q{q = Q,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_to_channel = MTC}) ->
+ assert_invariant(State),
+ {MsgIds, BQS1} = BQ:drain_confirmed(BQS),
+ MTC1 = confirm_messages(MsgIds, MTC, amqqueue:get_name(Q)),
+ State1 = State#q{backing_queue_state = BQS1, msg_id_to_channel = MTC1},
+ case BQ:needs_timeout(BQS1) of
+ false -> {stop_sync_timer(State1), hibernate };
+ idle -> {stop_sync_timer(State1), ?SYNC_INTERVAL};
+ timed -> {ensure_sync_timer(State1), 0 }
+ end.
+
+backing_queue_module(Q) ->
+ case rabbit_mirror_queue_misc:is_mirrored(Q) of
+ false -> {ok, BQM} = application:get_env(backing_queue_module),
+ BQM;
+ true -> rabbit_mirror_queue_master
+ end.
+
+ensure_sync_timer(State) ->
+ rabbit_misc:ensure_timer(State, #q.sync_timer_ref,
+ ?SYNC_INTERVAL, sync_timeout).
+
+stop_sync_timer(State) -> rabbit_misc:stop_timer(State, #q.sync_timer_ref).
+
+ensure_rate_timer(State) ->
+ rabbit_misc:ensure_timer(State, #q.rate_timer_ref,
+ ?RAM_DURATION_UPDATE_INTERVAL,
+ update_ram_duration).
+
+stop_rate_timer(State) -> rabbit_misc:stop_timer(State, #q.rate_timer_ref).
+
+%% We wish to expire only when there are no consumers *and* the expiry
+%% hasn't been refreshed (by queue.declare or basic.get) for the
+%% configured period.
+ensure_expiry_timer(State = #q{expires = undefined}) ->
+ State;
+ensure_expiry_timer(State = #q{expires = Expires,
+ args_policy_version = Version}) ->
+ case is_unused(State) of
+ true -> NewState = stop_expiry_timer(State),
+ rabbit_misc:ensure_timer(NewState, #q.expiry_timer_ref,
+ Expires, {maybe_expire, Version});
+ false -> State
+ end.
+
+stop_expiry_timer(State) -> rabbit_misc:stop_timer(State, #q.expiry_timer_ref).
+
+ensure_ttl_timer(undefined, State) ->
+ State;
+ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = undefined,
+ args_policy_version = Version}) ->
+ After = (case Expiry - os:system_time(micro_seconds) of
+ V when V > 0 -> V + 999; %% always fire later
+ _ -> 0
+ end) div 1000,
+ TRef = rabbit_misc:send_after(After, self(), {drop_expired, Version}),
+ State#q{ttl_timer_ref = TRef, ttl_timer_expiry = Expiry};
+ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = TRef,
+ ttl_timer_expiry = TExpiry})
+ when Expiry + 1000 < TExpiry ->
+ rabbit_misc:cancel_timer(TRef),
+ ensure_ttl_timer(Expiry, State#q{ttl_timer_ref = undefined});
+ensure_ttl_timer(_Expiry, State) ->
+ State.
+
+stop_ttl_timer(State) -> rabbit_misc:stop_timer(State, #q.ttl_timer_ref).
+
+ensure_stats_timer(State) ->
+ rabbit_event:ensure_stats_timer(State, #q.stats_timer, emit_stats).
+
+assert_invariant(#q{single_active_consumer_on = true}) ->
+ %% queue may contain messages and have available consumers with exclusive consumer
+ ok;
+assert_invariant(State = #q{consumers = Consumers, single_active_consumer_on = false}) ->
+ true = (rabbit_queue_consumers:inactive(Consumers) orelse is_empty(State)).
+
+is_empty(#q{backing_queue = BQ, backing_queue_state = BQS}) -> BQ:is_empty(BQS).
+
+maybe_send_drained(WasEmpty, State) ->
+ case (not WasEmpty) andalso is_empty(State) of
+ true -> notify_decorators(State),
+ rabbit_queue_consumers:send_drained();
+ false -> ok
+ end,
+ State.
+
+confirm_messages([], MTC, _QName) ->
+ MTC;
+confirm_messages(MsgIds, MTC, QName) ->
+ {CMs, MTC1} =
+ lists:foldl(
+ fun(MsgId, {CMs, MTC0}) ->
+ case maps:get(MsgId, MTC0, none) of
+ none ->
+ {CMs, MTC0};
+ {SenderPid, MsgSeqNo} ->
+ {maps:update_with(SenderPid,
+ fun(MsgSeqNos) ->
+ [MsgSeqNo | MsgSeqNos]
+ end,
+ [MsgSeqNo],
+ CMs),
+ maps:remove(MsgId, MTC0)}
+
+ end
+ end, {#{}, MTC}, MsgIds),
+ maps:fold(
+ fun(Pid, MsgSeqNos, _) ->
+ confirm_to_sender(Pid, QName, MsgSeqNos)
+ end,
+ ok,
+ CMs),
+ MTC1.
+
+send_or_record_confirm(#delivery{confirm = false}, State) ->
+ {never, State};
+send_or_record_confirm(#delivery{confirm = true,
+ sender = SenderPid,
+ msg_seq_no = MsgSeqNo,
+ message = #basic_message {
+ is_persistent = true,
+ id = MsgId}},
+ State = #q{q = Q,
+ msg_id_to_channel = MTC})
+ when ?amqqueue_is_durable(Q) ->
+ MTC1 = maps:put(MsgId, {SenderPid, MsgSeqNo}, MTC),
+ {eventually, State#q{msg_id_to_channel = MTC1}};
+send_or_record_confirm(#delivery{confirm = true,
+ sender = SenderPid,
+ msg_seq_no = MsgSeqNo},
+ #q{q = Q} = State) ->
+ confirm_to_sender(SenderPid, amqqueue:get_name(Q), [MsgSeqNo]),
+ {immediately, State}.
+
+%% This feature was used by `rabbit_amqqueue_process` and
+%% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x. It is
+%% unused in 3.8.x and thus deprecated. We keep it to support in-place
+%% upgrades to 3.8.x (i.e. mixed-version clusters), but it is a no-op
+%% starting with that version.
+send_mandatory(#delivery{mandatory = false}) ->
+ ok;
+send_mandatory(#delivery{mandatory = true,
+ sender = SenderPid,
+ msg_seq_no = MsgSeqNo}) ->
+ gen_server2:cast(SenderPid, {mandatory_received, MsgSeqNo}).
+
+discard(#delivery{confirm = Confirm,
+ sender = SenderPid,
+ flow = Flow,
+ message = #basic_message{id = MsgId}}, BQ, BQS, MTC, QName) ->
+ MTC1 = case Confirm of
+ true -> confirm_messages([MsgId], MTC, QName);
+ false -> MTC
+ end,
+ BQS1 = BQ:discard(MsgId, SenderPid, Flow, BQS),
+ {BQS1, MTC1}.
+
+run_message_queue(State) -> run_message_queue(false, State).
+
+run_message_queue(ActiveConsumersChanged, State) ->
+ case is_empty(State) of
+ true -> maybe_notify_decorators(ActiveConsumersChanged, State);
+ false -> case rabbit_queue_consumers:deliver(
+ fun(AckRequired) -> fetch(AckRequired, State) end,
+ qname(State), State#q.consumers,
+ State#q.single_active_consumer_on, State#q.active_consumer) of
+ {delivered, ActiveConsumersChanged1, State1, Consumers} ->
+ run_message_queue(
+ ActiveConsumersChanged or ActiveConsumersChanged1,
+ State1#q{consumers = Consumers});
+ {undelivered, ActiveConsumersChanged1, Consumers} ->
+ maybe_notify_decorators(
+ ActiveConsumersChanged or ActiveConsumersChanged1,
+ State#q{consumers = Consumers})
+ end
+ end.
+
+attempt_delivery(Delivery = #delivery{sender = SenderPid,
+ flow = Flow,
+ message = Message},
+ Props, Delivered, State = #q{q = Q,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_to_channel = MTC}) ->
+ case rabbit_queue_consumers:deliver(
+ fun (true) -> true = BQ:is_empty(BQS),
+ {AckTag, BQS1} =
+ BQ:publish_delivered(
+ Message, Props, SenderPid, Flow, BQS),
+ {{Message, Delivered, AckTag}, {BQS1, MTC}};
+ (false) -> {{Message, Delivered, undefined},
+ discard(Delivery, BQ, BQS, MTC, amqqueue:get_name(Q))}
+ end, qname(State), State#q.consumers, State#q.single_active_consumer_on, State#q.active_consumer) of
+ {delivered, ActiveConsumersChanged, {BQS1, MTC1}, Consumers} ->
+ {delivered, maybe_notify_decorators(
+ ActiveConsumersChanged,
+ State#q{backing_queue_state = BQS1,
+ msg_id_to_channel = MTC1,
+ consumers = Consumers})};
+ {undelivered, ActiveConsumersChanged, Consumers} ->
+ {undelivered, maybe_notify_decorators(
+ ActiveConsumersChanged,
+ State#q{consumers = Consumers})}
+ end.
+
+maybe_deliver_or_enqueue(Delivery = #delivery{message = Message},
+ Delivered,
+ State = #q{overflow = Overflow,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ dlx = DLX,
+ dlx_routing_key = RK}) ->
+ send_mandatory(Delivery), %% must do this before confirms
+ case {will_overflow(Delivery, State), Overflow} of
+ {true, 'reject-publish'} ->
+ %% Drop publish and nack to publisher
+ send_reject_publish(Delivery, Delivered, State);
+ {true, 'reject-publish-dlx'} ->
+ %% Publish to DLX
+ with_dlx(
+ DLX,
+ fun (X) ->
+ QName = qname(State),
+ rabbit_dead_letter:publish(Message, maxlen, X, RK, QName)
+ end,
+ fun () -> ok end),
+ %% Drop publish and nack to publisher
+ send_reject_publish(Delivery, Delivered, State);
+ _ ->
+ {IsDuplicate, BQS1} = BQ:is_duplicate(Message, BQS),
+ State1 = State#q{backing_queue_state = BQS1},
+ case IsDuplicate of
+ true -> State1;
+ {true, drop} -> State1;
+ %% Drop publish and nack to publisher
+ {true, reject} ->
+ send_reject_publish(Delivery, Delivered, State1);
+ %% Enqueue and maybe drop head later
+ false ->
+ deliver_or_enqueue(Delivery, Delivered, State1)
+ end
+ end.
+
+deliver_or_enqueue(Delivery = #delivery{message = Message,
+ sender = SenderPid,
+ flow = Flow},
+ Delivered,
+ State = #q{q = Q, backing_queue = BQ}) ->
+ {Confirm, State1} = send_or_record_confirm(Delivery, State),
+ Props = message_properties(Message, Confirm, State1),
+ case attempt_delivery(Delivery, Props, Delivered, State1) of
+ {delivered, State2} ->
+ State2;
+ %% The next one is an optimisation
+ {undelivered, State2 = #q{ttl = 0, dlx = undefined,
+ backing_queue_state = BQS,
+ msg_id_to_channel = MTC}} ->
+ {BQS1, MTC1} = discard(Delivery, BQ, BQS, MTC, amqqueue:get_name(Q)),
+ State2#q{backing_queue_state = BQS1, msg_id_to_channel = MTC1};
+ {undelivered, State2 = #q{backing_queue_state = BQS}} ->
+
+ BQS1 = BQ:publish(Message, Props, Delivered, SenderPid, Flow, BQS),
+ {Dropped, State3 = #q{backing_queue_state = BQS2}} =
+ maybe_drop_head(State2#q{backing_queue_state = BQS1}),
+ QLen = BQ:len(BQS2),
+ %% optimisation: it would be perfectly safe to always
+ %% invoke drop_expired_msgs here, but that is expensive so
+ %% we only do that if a new message that might have an
+ %% expiry ends up at the head of the queue. If the head
+ %% remains unchanged, or if the newly published message
+ %% has no expiry and becomes the head of the queue then
+ %% the call is unnecessary.
+ case {Dropped, QLen =:= 1, Props#message_properties.expiry} of
+ {false, false, _} -> State3;
+ {true, true, undefined} -> State3;
+ {_, _, _} -> drop_expired_msgs(State3)
+ end
+ end.
+
+maybe_drop_head(State = #q{max_length = undefined,
+ max_bytes = undefined}) ->
+ {false, State};
+maybe_drop_head(State = #q{overflow = 'reject-publish'}) ->
+ {false, State};
+maybe_drop_head(State = #q{overflow = 'reject-publish-dlx'}) ->
+ {false, State};
+maybe_drop_head(State = #q{overflow = 'drop-head'}) ->
+ maybe_drop_head(false, State).
+
+maybe_drop_head(AlreadyDropped, State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ case over_max_length(State) of
+ true ->
+ maybe_drop_head(true,
+ with_dlx(
+ State#q.dlx,
+ fun (X) -> dead_letter_maxlen_msg(X, State) end,
+ fun () ->
+ {_, BQS1} = BQ:drop(false, BQS),
+ State#q{backing_queue_state = BQS1}
+ end));
+ false ->
+ {AlreadyDropped, State}
+ end.
+
+send_reject_publish(#delivery{confirm = true,
+ sender = SenderPid,
+ flow = Flow,
+ msg_seq_no = MsgSeqNo,
+ message = #basic_message{id = MsgId}},
+ _Delivered,
+ State = #q{ q = Q,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_to_channel = MTC}) ->
+ ok = rabbit_classic_queue:send_rejection(SenderPid,
+ amqqueue:get_name(Q), MsgSeqNo),
+
+ MTC1 = maps:remove(MsgId, MTC),
+ BQS1 = BQ:discard(MsgId, SenderPid, Flow, BQS),
+ State#q{ backing_queue_state = BQS1, msg_id_to_channel = MTC1 };
+send_reject_publish(#delivery{confirm = false},
+ _Delivered, State) ->
+ State.
+
+will_overflow(_, #q{max_length = undefined,
+ max_bytes = undefined}) -> false;
+will_overflow(#delivery{message = Message},
+ #q{max_length = MaxLen,
+ max_bytes = MaxBytes,
+ backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ ExpectedQueueLength = BQ:len(BQS) + 1,
+
+ #basic_message{content = #content{payload_fragments_rev = PFR}} = Message,
+ MessageSize = iolist_size(PFR),
+ ExpectedQueueSizeBytes = BQ:info(message_bytes_ready, BQS) + MessageSize,
+
+ ExpectedQueueLength > MaxLen orelse ExpectedQueueSizeBytes > MaxBytes.
+
+over_max_length(#q{max_length = MaxLen,
+ max_bytes = MaxBytes,
+ backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ BQ:len(BQS) > MaxLen orelse BQ:info(message_bytes_ready, BQS) > MaxBytes.
+
+requeue_and_run(AckTags, State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ WasEmpty = BQ:is_empty(BQS),
+ {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS),
+ {_Dropped, State1} = maybe_drop_head(State#q{backing_queue_state = BQS1}),
+ run_message_queue(maybe_send_drained(WasEmpty, drop_expired_msgs(State1))).
+
+fetch(AckRequired, State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ {Result, BQS1} = BQ:fetch(AckRequired, BQS),
+ State1 = drop_expired_msgs(State#q{backing_queue_state = BQS1}),
+ {Result, maybe_send_drained(Result =:= empty, State1)}.
+
+ack(AckTags, ChPid, State) ->
+ subtract_acks(ChPid, AckTags, State,
+ fun (State1 = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ {_Guids, BQS1} = BQ:ack(AckTags, BQS),
+ State1#q{backing_queue_state = BQS1}
+ end).
+
+requeue(AckTags, ChPid, State) ->
+ subtract_acks(ChPid, AckTags, State,
+ fun (State1) -> requeue_and_run(AckTags, State1) end).
+
+possibly_unblock(Update, ChPid, State = #q{consumers = Consumers}) ->
+ case rabbit_queue_consumers:possibly_unblock(Update, ChPid, Consumers) of
+ unchanged -> State;
+ {unblocked, Consumers1} -> State1 = State#q{consumers = Consumers1},
+ run_message_queue(true, State1)
+ end.
+
+should_auto_delete(#q{q = Q})
+ when not ?amqqueue_is_auto_delete(Q) -> false;
+should_auto_delete(#q{has_had_consumers = false}) -> false;
+should_auto_delete(State) -> is_unused(State).
+
+handle_ch_down(DownPid, State = #q{consumers = Consumers,
+ active_consumer = Holder,
+ single_active_consumer_on = SingleActiveConsumerOn,
+ senders = Senders}) ->
+ State1 = State#q{senders = case pmon:is_monitored(DownPid, Senders) of
+ false ->
+ Senders;
+ true ->
+ %% A rabbit_channel process died. Here credit_flow will take care
+ %% of cleaning up the rabbit_amqqueue_process process dictionary
+ %% with regards to the credit we were tracking for the channel
+ %% process. See handle_cast({deliver, Deliver}, State) in this
+ %% module. In that cast function we process deliveries from the
+ %% channel, which means we credit_flow:ack/1 said
+ %% messages. credit_flow:ack'ing messages means we are increasing
+ %% a counter to know when we need to send MoreCreditAfter. Since
+ %% the process died, the credit_flow flow module will clean up
+ %% that for us.
+ credit_flow:peer_down(DownPid),
+ pmon:demonitor(DownPid, Senders)
+ end},
+ case rabbit_queue_consumers:erase_ch(DownPid, Consumers) of
+ not_found ->
+ {ok, State1};
+ {ChAckTags, ChCTags, Consumers1} ->
+ QName = qname(State1),
+ [emit_consumer_deleted(DownPid, CTag, QName, ?INTERNAL_USER) || CTag <- ChCTags],
+ Holder1 = new_single_active_consumer_after_channel_down(DownPid, Holder, SingleActiveConsumerOn, Consumers1),
+ State2 = State1#q{consumers = Consumers1,
+ active_consumer = Holder1},
+ maybe_notify_consumer_updated(State2, Holder, Holder1),
+ notify_decorators(State2),
+ case should_auto_delete(State2) of
+ true ->
+ log_auto_delete(
+ io_lib:format(
+ "because all of its consumers (~p) were on a channel that was closed",
+ [length(ChCTags)]),
+ State),
+ {stop, State2};
+ false -> {ok, requeue_and_run(ChAckTags,
+ ensure_expiry_timer(State2))}
+ end
+ end.
+
+new_single_active_consumer_after_channel_down(DownChPid, CurrentSingleActiveConsumer, _SingleActiveConsumerIsOn = true, Consumers) ->
+ case CurrentSingleActiveConsumer of
+ {DownChPid, _} ->
+ % the single active consumer is on the down channel, we have to replace it
+ case rabbit_queue_consumers:get_consumer(Consumers) of
+ undefined -> none;
+ Consumer -> Consumer
+ end;
+ _ ->
+ CurrentSingleActiveConsumer
+ end;
+new_single_active_consumer_after_channel_down(DownChPid, CurrentSingleActiveConsumer, _SingleActiveConsumerIsOn = false, _Consumers) ->
+ case CurrentSingleActiveConsumer of
+ {DownChPid, _} -> none;
+ Other -> Other
+ end.
+
+check_exclusive_access({_ChPid, _ConsumerTag}, _ExclusiveConsume, _State) ->
+ in_use;
+check_exclusive_access(none, false, _State) ->
+ ok;
+check_exclusive_access(none, true, State) ->
+ case is_unused(State) of
+ true -> ok;
+ false -> in_use
+ end.
+
+is_unused(_State) -> rabbit_queue_consumers:count() == 0.
+
+maybe_send_reply(_ChPid, undefined) -> ok;
+maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
+
+qname(#q{q = Q}) -> amqqueue:get_name(Q).
+
+backing_queue_timeout(State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ State#q{backing_queue_state = BQ:timeout(BQS)}.
+
+subtract_acks(ChPid, AckTags, State = #q{consumers = Consumers}, Fun) ->
+ case rabbit_queue_consumers:subtract_acks(ChPid, AckTags, Consumers) of
+ not_found -> State;
+ unchanged -> Fun(State);
+ {unblocked, Consumers1} -> State1 = State#q{consumers = Consumers1},
+ run_message_queue(true, Fun(State1))
+ end.
+
+message_properties(Message = #basic_message{content = Content},
+ Confirm, #q{ttl = TTL}) ->
+ #content{payload_fragments_rev = PFR} = Content,
+ #message_properties{expiry = calculate_msg_expiry(Message, TTL),
+ needs_confirming = Confirm == eventually,
+ size = iolist_size(PFR)}.
+
+calculate_msg_expiry(#basic_message{content = Content}, TTL) ->
+ #content{properties = Props} =
+ rabbit_binary_parser:ensure_content_decoded(Content),
+ %% We assert that the expiration must be valid - we check in the channel.
+ {ok, MsgTTL} = rabbit_basic:parse_expiration(Props),
+ case lists:min([TTL, MsgTTL]) of
+ undefined -> undefined;
+ T -> os:system_time(micro_seconds) + T * 1000
+ end.
+
+%% Logically this function should invoke maybe_send_drained/2.
+%% However, that is expensive. Since some frequent callers of
+%% drop_expired_msgs/1, in particular deliver_or_enqueue/3, cannot
+%% possibly cause the queue to become empty, we push the
+%% responsibility to the callers. So be cautious when adding new ones.
+drop_expired_msgs(State) ->
+ case is_empty(State) of
+ true -> State;
+ false -> drop_expired_msgs(os:system_time(micro_seconds),
+ State)
+ end.
+
+drop_expired_msgs(Now, State = #q{backing_queue_state = BQS,
+ backing_queue = BQ }) ->
+ ExpirePred = fun (#message_properties{expiry = Exp}) -> Now >= Exp end,
+ {Props, State1} =
+ with_dlx(
+ State#q.dlx,
+ fun (X) -> dead_letter_expired_msgs(ExpirePred, X, State) end,
+ fun () -> {Next, BQS1} = BQ:dropwhile(ExpirePred, BQS),
+ {Next, State#q{backing_queue_state = BQS1}} end),
+ ensure_ttl_timer(case Props of
+ undefined -> undefined;
+ #message_properties{expiry = Exp} -> Exp
+ end, State1).
+
+with_dlx(undefined, _With, Without) -> Without();
+with_dlx(DLX, With, Without) -> case rabbit_exchange:lookup(DLX) of
+ {ok, X} -> With(X);
+ {error, not_found} -> Without()
+ end.
+
+dead_letter_expired_msgs(ExpirePred, X, State = #q{backing_queue = BQ}) ->
+ dead_letter_msgs(fun (DLFun, Acc, BQS1) ->
+ BQ:fetchwhile(ExpirePred, DLFun, Acc, BQS1)
+ end, expired, X, State).
+
+dead_letter_rejected_msgs(AckTags, X, State = #q{backing_queue = BQ}) ->
+ {ok, State1} =
+ dead_letter_msgs(
+ fun (DLFun, Acc, BQS) ->
+ {Acc1, BQS1} = BQ:ackfold(DLFun, Acc, BQS, AckTags),
+ {ok, Acc1, BQS1}
+ end, rejected, X, State),
+ State1.
+
+dead_letter_maxlen_msg(X, State = #q{backing_queue = BQ}) ->
+ {ok, State1} =
+ dead_letter_msgs(
+ fun (DLFun, Acc, BQS) ->
+ {{Msg, _, AckTag}, BQS1} = BQ:fetch(true, BQS),
+ {ok, DLFun(Msg, AckTag, Acc), BQS1}
+ end, maxlen, X, State),
+ State1.
+
+dead_letter_msgs(Fun, Reason, X, State = #q{dlx_routing_key = RK,
+ backing_queue_state = BQS,
+ backing_queue = BQ}) ->
+ QName = qname(State),
+ {Res, Acks1, BQS1} =
+ Fun(fun (Msg, AckTag, Acks) ->
+ rabbit_dead_letter:publish(Msg, Reason, X, RK, QName),
+ [AckTag | Acks]
+ end, [], BQS),
+ {_Guids, BQS2} = BQ:ack(Acks1, BQS1),
+ {Res, State#q{backing_queue_state = BQS2}}.
+
+stop(State) -> stop(noreply, State).
+
+stop(noreply, State) -> {stop, normal, State};
+stop(Reply, State) -> {stop, normal, Reply, State}.
+
+infos(Items, #q{q = Q} = State) ->
+ lists:foldr(fun(totals, Acc) ->
+ [{messages_ready, i(messages_ready, State)},
+ {messages, i(messages, State)},
+ {messages_unacknowledged, i(messages_unacknowledged, State)}] ++ Acc;
+ (type_specific, Acc) ->
+ format(Q) ++ Acc;
+ (Item, Acc) ->
+ [{Item, i(Item, State)} | Acc]
+ end, [], Items).
+
+i(name, #q{q = Q}) -> amqqueue:get_name(Q);
+i(durable, #q{q = Q}) -> amqqueue:is_durable(Q);
+i(auto_delete, #q{q = Q}) -> amqqueue:is_auto_delete(Q);
+i(arguments, #q{q = Q}) -> amqqueue:get_arguments(Q);
+i(pid, _) ->
+ self();
+i(owner_pid, #q{q = Q}) when ?amqqueue_exclusive_owner_is(Q, none) ->
+ '';
+i(owner_pid, #q{q = Q}) ->
+ amqqueue:get_exclusive_owner(Q);
+i(exclusive, #q{q = Q}) ->
+ ExclusiveOwner = amqqueue:get_exclusive_owner(Q),
+ is_pid(ExclusiveOwner);
+i(policy, #q{q = Q}) ->
+ case rabbit_policy:name(Q) of
+ none -> '';
+ Policy -> Policy
+ end;
+i(operator_policy, #q{q = Q}) ->
+ case rabbit_policy:name_op(Q) of
+ none -> '';
+ Policy -> Policy
+ end;
+i(effective_policy_definition, #q{q = Q}) ->
+ case rabbit_policy:effective_definition(Q) of
+ undefined -> [];
+ Def -> Def
+ end;
+i(exclusive_consumer_pid, #q{active_consumer = {ChPid, _ConsumerTag}, single_active_consumer_on = false}) ->
+ ChPid;
+i(exclusive_consumer_pid, _) ->
+ '';
+i(exclusive_consumer_tag, #q{active_consumer = {_ChPid, ConsumerTag}, single_active_consumer_on = false}) ->
+ ConsumerTag;
+i(exclusive_consumer_tag, _) ->
+ '';
+i(single_active_consumer_pid, #q{active_consumer = {ChPid, _Consumer}, single_active_consumer_on = true}) ->
+ ChPid;
+i(single_active_consumer_pid, _) ->
+ '';
+i(single_active_consumer_tag, #q{active_consumer = {_ChPid, Consumer}, single_active_consumer_on = true}) ->
+ rabbit_queue_consumers:consumer_tag(Consumer);
+i(single_active_consumer_tag, _) ->
+ '';
+i(messages_ready, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
+ BQ:len(BQS);
+i(messages_unacknowledged, _) ->
+ rabbit_queue_consumers:unacknowledged_message_count();
+i(messages, State) ->
+ lists:sum([i(Item, State) || Item <- [messages_ready,
+ messages_unacknowledged]]);
+i(consumers, _) ->
+ rabbit_queue_consumers:count();
+i(consumer_utilisation, #q{consumers = Consumers}) ->
+ case rabbit_queue_consumers:count() of
+ 0 -> '';
+ _ -> rabbit_queue_consumers:utilisation(Consumers)
+ end;
+i(memory, _) ->
+ {memory, M} = process_info(self(), memory),
+ M;
+i(slave_pids, #q{q = Q0}) ->
+ Name = amqqueue:get_name(Q0),
+ {ok, Q} = rabbit_amqqueue:lookup(Name),
+ case rabbit_mirror_queue_misc:is_mirrored(Q) of
+ false -> '';
+ true -> amqqueue:get_slave_pids(Q)
+ end;
+i(synchronised_slave_pids, #q{q = Q0}) ->
+ Name = amqqueue:get_name(Q0),
+ {ok, Q} = rabbit_amqqueue:lookup(Name),
+ case rabbit_mirror_queue_misc:is_mirrored(Q) of
+ false -> '';
+ true -> amqqueue:get_sync_slave_pids(Q)
+ end;
+i(recoverable_slaves, #q{q = Q0}) ->
+ Name = amqqueue:get_name(Q0),
+ Durable = amqqueue:is_durable(Q0),
+ {ok, Q} = rabbit_amqqueue:lookup(Name),
+ case Durable andalso rabbit_mirror_queue_misc:is_mirrored(Q) of
+ false -> '';
+ true -> amqqueue:get_recoverable_slaves(Q)
+ end;
+i(state, #q{status = running}) -> credit_flow:state();
+i(state, #q{status = State}) -> State;
+i(garbage_collection, _State) ->
+ rabbit_misc:get_gc_info(self());
+i(reductions, _State) ->
+ {reductions, Reductions} = erlang:process_info(self(), reductions),
+ Reductions;
+i(user_who_performed_action, #q{q = Q}) ->
+ Opts = amqqueue:get_options(Q),
+ maps:get(user, Opts, ?UNKNOWN_USER);
+i(type, _) -> classic;
+i(Item, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
+ BQ:info(Item, BQS).
+
+emit_stats(State) ->
+ emit_stats(State, []).
+
+emit_stats(State, Extra) ->
+ ExtraKs = [K || {K, _} <- Extra],
+ [{messages_ready, MR}, {messages_unacknowledged, MU}, {messages, M},
+ {reductions, R}, {name, Name} | Infos] = All
+ = [{K, V} || {K, V} <- infos(statistics_keys(), State),
+ not lists:member(K, ExtraKs)],
+ rabbit_core_metrics:queue_stats(Name, Extra ++ Infos),
+ rabbit_core_metrics:queue_stats(Name, MR, MU, M, R),
+ rabbit_event:notify(queue_stats, Extra ++ All).
+
+emit_consumer_created(ChPid, CTag, Exclusive, AckRequired, QName,
+ PrefetchCount, Args, Ref, ActingUser) ->
+ rabbit_event:notify(consumer_created,
+ [{consumer_tag, CTag},
+ {exclusive, Exclusive},
+ {ack_required, AckRequired},
+ {channel, ChPid},
+ {queue, QName},
+ {prefetch_count, PrefetchCount},
+ {arguments, Args},
+ {user_who_performed_action, ActingUser}],
+ Ref).
+
+emit_consumer_deleted(ChPid, ConsumerTag, QName, ActingUser) ->
+ rabbit_core_metrics:consumer_deleted(ChPid, ConsumerTag, QName),
+ rabbit_event:notify(consumer_deleted,
+ [{consumer_tag, ConsumerTag},
+ {channel, ChPid},
+ {queue, QName},
+ {user_who_performed_action, ActingUser}]).
+
+%%----------------------------------------------------------------------------
+
+prioritise_call(Msg, _From, _Len, State) ->
+ case Msg of
+ info -> 9;
+ {info, _Items} -> 9;
+ consumers -> 9;
+ stat -> 7;
+ {basic_consume, _, _, _, _, _, _, _, _, _} -> consumer_bias(State, 0, 2);
+ {basic_cancel, _, _, _} -> consumer_bias(State, 0, 2);
+ _ -> 0
+ end.
+
+prioritise_cast(Msg, _Len, State) ->
+ case Msg of
+ delete_immediately -> 8;
+ {delete_exclusive, _Pid} -> 8;
+ {set_ram_duration_target, _Duration} -> 8;
+ {set_maximum_since_use, _Age} -> 8;
+ {run_backing_queue, _Mod, _Fun} -> 6;
+ {ack, _AckTags, _ChPid} -> 4; %% [1]
+ {resume, _ChPid} -> 3;
+ {notify_sent, _ChPid, _Credit} -> consumer_bias(State, 0, 2);
+ _ -> 0
+ end.
+
+%% [1] It should be safe to always prioritise ack / resume since they
+%% will be rate limited by how fast consumers receive messages -
+%% i.e. by notify_sent. We prioritise ack and resume to discourage
+%% starvation caused by prioritising notify_sent. We don't vary their
+%% priority since acks should stay in order (some parts of the queue
+%% stack are optimised for that) and to make things easier to reason
+%% about. Finally, we prioritise ack over resume since it should
+%% always reduce memory use.
+%% bump_reduce_memory_use is prioritised over publishes, because sending
+%% credit to self is hard to reason about. Consumers can continue while
+%% reduce_memory_use is in progress.
+
+consumer_bias(#q{backing_queue = BQ, backing_queue_state = BQS}, Low, High) ->
+ case BQ:msg_rates(BQS) of
+ {0.0, _} -> Low;
+ {Ingress, Egress} when Egress / Ingress < ?CONSUMER_BIAS_RATIO -> High;
+ {_, _} -> Low
+ end.
+
+prioritise_info(Msg, _Len, #q{q = Q}) ->
+ DownPid = amqqueue:get_exclusive_owner(Q),
+ case Msg of
+ {'DOWN', _, process, DownPid, _} -> 8;
+ update_ram_duration -> 8;
+ {maybe_expire, _Version} -> 8;
+ {drop_expired, _Version} -> 8;
+ emit_stats -> 7;
+ sync_timeout -> 6;
+ bump_reduce_memory_use -> 1;
+ _ -> 0
+ end.
+
+handle_call({init, Recover}, From, State) ->
+ try
+ init_it(Recover, From, State)
+ catch
+ {coordinator_not_started, Reason} ->
+ %% The GM can shutdown before the coordinator has started up
+ %% (lost membership or missing group), thus the start_link of
+ %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process
+ %% is trapping exists. The master captures this return value and
+ %% throws the current exception.
+ {stop, Reason, State}
+ end;
+
+handle_call(info, _From, State) ->
+ reply({ok, infos(info_keys(), State)}, State);
+
+handle_call({info, Items}, _From, State) ->
+ try
+ reply({ok, infos(Items, State)}, State)
+ catch Error -> reply({error, Error}, State)
+ end;
+
+handle_call(consumers, _From, State = #q{consumers = Consumers, single_active_consumer_on = false}) ->
+ reply(rabbit_queue_consumers:all(Consumers), State);
+handle_call(consumers, _From, State = #q{consumers = Consumers, active_consumer = ActiveConsumer}) ->
+ reply(rabbit_queue_consumers:all(Consumers, ActiveConsumer, true), State);
+
+handle_call({notify_down, ChPid}, _From, State) ->
+ %% we want to do this synchronously, so that auto_deleted queues
+ %% are no longer visible by the time we send a response to the
+ %% client. The queue is ultimately deleted in terminate/2; if we
+ %% return stop with a reply, terminate/2 will be called by
+ %% gen_server2 *before* the reply is sent.
+ case handle_ch_down(ChPid, State) of
+ {ok, State1} -> reply(ok, State1);
+ {stop, State1} -> stop(ok, State1#q{status = {terminated_by, auto_delete}})
+ end;
+
+handle_call({basic_get, ChPid, NoAck, LimiterPid}, _From,
+ State = #q{q = Q}) ->
+ QName = amqqueue:get_name(Q),
+ AckRequired = not NoAck,
+ State1 = ensure_expiry_timer(State),
+ case fetch(AckRequired, State1) of
+ {empty, State2} ->
+ reply(empty, State2);
+ {{Message, IsDelivered, AckTag},
+ #q{backing_queue = BQ, backing_queue_state = BQS} = State2} ->
+ case AckRequired of
+ true -> ok = rabbit_queue_consumers:record_ack(
+ ChPid, LimiterPid, AckTag);
+ false -> ok
+ end,
+ Msg = {QName, self(), AckTag, IsDelivered, Message},
+ reply({ok, BQ:len(BQS), Msg}, State2)
+ end;
+
+handle_call({basic_consume, NoAck, ChPid, LimiterPid, LimiterActive,
+ PrefetchCount, ConsumerTag, ExclusiveConsume, Args, OkMsg, ActingUser},
+ _From, State = #q{consumers = Consumers,
+ active_consumer = Holder,
+ single_active_consumer_on = SingleActiveConsumerOn}) ->
+ ConsumerRegistration = case SingleActiveConsumerOn of
+ true ->
+ case ExclusiveConsume of
+ true ->
+ {error, reply({error, exclusive_consume_unavailable}, State)};
+ false ->
+ Consumers1 = rabbit_queue_consumers:add(
+ ChPid, ConsumerTag, NoAck,
+ LimiterPid, LimiterActive,
+ PrefetchCount, Args, is_empty(State),
+ ActingUser, Consumers),
+
+ case Holder of
+ none ->
+ NewConsumer = rabbit_queue_consumers:get(ChPid, ConsumerTag, Consumers1),
+ {state, State#q{consumers = Consumers1,
+ has_had_consumers = true,
+ active_consumer = NewConsumer}};
+ _ ->
+ {state, State#q{consumers = Consumers1,
+ has_had_consumers = true}}
+ end
+ end;
+ false ->
+ case check_exclusive_access(Holder, ExclusiveConsume, State) of
+ in_use -> {error, reply({error, exclusive_consume_unavailable}, State)};
+ ok ->
+ Consumers1 = rabbit_queue_consumers:add(
+ ChPid, ConsumerTag, NoAck,
+ LimiterPid, LimiterActive,
+ PrefetchCount, Args, is_empty(State),
+ ActingUser, Consumers),
+ ExclusiveConsumer =
+ if ExclusiveConsume -> {ChPid, ConsumerTag};
+ true -> Holder
+ end,
+ {state, State#q{consumers = Consumers1,
+ has_had_consumers = true,
+ active_consumer = ExclusiveConsumer}}
+ end
+ end,
+ case ConsumerRegistration of
+ {error, Reply} ->
+ Reply;
+ {state, State1} ->
+ ok = maybe_send_reply(ChPid, OkMsg),
+ QName = qname(State1),
+ AckRequired = not NoAck,
+ TheConsumer = rabbit_queue_consumers:get(ChPid, ConsumerTag, State1#q.consumers),
+ {ConsumerIsActive, ActivityStatus} =
+ case {SingleActiveConsumerOn, State1#q.active_consumer} of
+ {true, TheConsumer} ->
+ {true, single_active};
+ {true, _} ->
+ {false, waiting};
+ {false, _} ->
+ {true, up}
+ end,
+ rabbit_core_metrics:consumer_created(
+ ChPid, ConsumerTag, ExclusiveConsume, AckRequired, QName,
+ PrefetchCount, ConsumerIsActive, ActivityStatus, Args),
+ emit_consumer_created(ChPid, ConsumerTag, ExclusiveConsume,
+ AckRequired, QName, PrefetchCount,
+ Args, none, ActingUser),
+ notify_decorators(State1),
+ reply(ok, run_message_queue(State1))
+ end;
+
+handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg, ActingUser}, _From,
+ State = #q{consumers = Consumers,
+ active_consumer = Holder,
+ single_active_consumer_on = SingleActiveConsumerOn }) ->
+ ok = maybe_send_reply(ChPid, OkMsg),
+ case rabbit_queue_consumers:remove(ChPid, ConsumerTag, Consumers) of
+ not_found ->
+ reply(ok, State);
+ Consumers1 ->
+ Holder1 = new_single_active_consumer_after_basic_cancel(ChPid, ConsumerTag,
+ Holder, SingleActiveConsumerOn, Consumers1
+ ),
+ State1 = State#q{consumers = Consumers1,
+ active_consumer = Holder1},
+ maybe_notify_consumer_updated(State1, Holder, Holder1),
+ emit_consumer_deleted(ChPid, ConsumerTag, qname(State1), ActingUser),
+ notify_decorators(State1),
+ case should_auto_delete(State1) of
+ false -> reply(ok, ensure_expiry_timer(State1));
+ true ->
+ log_auto_delete(
+ io_lib:format(
+ "because its last consumer with tag '~s' was cancelled",
+ [ConsumerTag]),
+ State),
+ stop(ok, State1)
+ end
+ end;
+
+handle_call(stat, _From, State) ->
+ State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
+ ensure_expiry_timer(State),
+ reply({ok, BQ:len(BQS), rabbit_queue_consumers:count()}, State1);
+
+handle_call({delete, IfUnused, IfEmpty, ActingUser}, _From,
+ State = #q{backing_queue_state = BQS, backing_queue = BQ}) ->
+ IsEmpty = BQ:is_empty(BQS),
+ IsUnused = is_unused(State),
+ if
+ IfEmpty and not(IsEmpty) -> reply({error, not_empty}, State);
+ IfUnused and not(IsUnused) -> reply({error, in_use}, State);
+ true -> stop({ok, BQ:len(BQS)},
+ State#q{status = {terminated_by, ActingUser}})
+ end;
+
+handle_call(purge, _From, State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ {Count, BQS1} = BQ:purge(BQS),
+ State1 = State#q{backing_queue_state = BQS1},
+ reply({ok, Count}, maybe_send_drained(Count =:= 0, State1));
+
+handle_call({requeue, AckTags, ChPid}, From, State) ->
+ gen_server2:reply(From, ok),
+ noreply(requeue(AckTags, ChPid, State));
+
+handle_call(sync_mirrors, _From,
+ State = #q{backing_queue = rabbit_mirror_queue_master,
+ backing_queue_state = BQS}) ->
+ S = fun(BQSN) -> State#q{backing_queue_state = BQSN} end,
+ HandleInfo = fun (Status) ->
+ receive {'$gen_call', From, {info, Items}} ->
+ Infos = infos(Items, State#q{status = Status}),
+ gen_server2:reply(From, {ok, Infos})
+ after 0 ->
+ ok
+ end
+ end,
+ EmitStats = fun (Status) ->
+ rabbit_event:if_enabled(
+ State, #q.stats_timer,
+ fun() -> emit_stats(State#q{status = Status}) end)
+ end,
+ case rabbit_mirror_queue_master:sync_mirrors(HandleInfo, EmitStats, BQS) of
+ {ok, BQS1} -> reply(ok, S(BQS1));
+ {stop, Reason, BQS1} -> {stop, Reason, S(BQS1)}
+ end;
+
+handle_call(sync_mirrors, _From, State) ->
+ reply({error, not_mirrored}, State);
+
+%% By definition if we get this message here we do not have to do anything.
+handle_call(cancel_sync_mirrors, _From, State) ->
+ reply({ok, not_syncing}, State).
+
+new_single_active_consumer_after_basic_cancel(ChPid, ConsumerTag, CurrentSingleActiveConsumer,
+ _SingleActiveConsumerIsOn = true, Consumers) ->
+ case rabbit_queue_consumers:is_same(ChPid, ConsumerTag, CurrentSingleActiveConsumer) of
+ true ->
+ case rabbit_queue_consumers:get_consumer(Consumers) of
+ undefined -> none;
+ Consumer -> Consumer
+ end;
+ false ->
+ CurrentSingleActiveConsumer
+ end;
+new_single_active_consumer_after_basic_cancel(ChPid, ConsumerTag, CurrentSingleActiveConsumer,
+ _SingleActiveConsumerIsOn = false, _Consumers) ->
+ case CurrentSingleActiveConsumer of
+ {ChPid, ConsumerTag} -> none;
+ _ -> CurrentSingleActiveConsumer
+ end.
+
+maybe_notify_consumer_updated(#q{single_active_consumer_on = false}, _, _) ->
+ ok;
+maybe_notify_consumer_updated(#q{single_active_consumer_on = true}, SingleActiveConsumer, SingleActiveConsumer) ->
+ % the single active consumer didn't change, nothing to do
+ ok;
+maybe_notify_consumer_updated(#q{single_active_consumer_on = true} = State, _PreviousConsumer, NewConsumer) ->
+ case NewConsumer of
+ {ChPid, Consumer} ->
+ {Tag, Ack, Prefetch, Args} = rabbit_queue_consumers:get_infos(Consumer),
+ rabbit_core_metrics:consumer_updated(
+ ChPid, Tag, false, Ack, qname(State),
+ Prefetch, true, single_active, Args
+ ),
+ ok;
+ _ ->
+ ok
+ end.
+
+handle_cast(init, State) ->
+ try
+ init_it({no_barrier, non_clean_shutdown}, none, State)
+ catch
+ {coordinator_not_started, Reason} ->
+ %% The GM can shutdown before the coordinator has started up
+ %% (lost membership or missing group), thus the start_link of
+ %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process
+ %% is trapping exists. The master captures this return value and
+ %% throws the current exception.
+ {stop, Reason, State}
+ end;
+
+handle_cast({run_backing_queue, Mod, Fun},
+ State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
+ noreply(State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)});
+
+handle_cast({deliver,
+ Delivery = #delivery{sender = Sender,
+ flow = Flow},
+ SlaveWhenPublished},
+ State = #q{senders = Senders}) ->
+ Senders1 = case Flow of
+ %% In both credit_flow:ack/1 we are acking messages to the channel
+ %% process that sent us the message delivery. See handle_ch_down
+ %% for more info.
+ flow -> credit_flow:ack(Sender),
+ case SlaveWhenPublished of
+ true -> credit_flow:ack(Sender); %% [0]
+ false -> ok
+ end,
+ pmon:monitor(Sender, Senders);
+ noflow -> Senders
+ end,
+ State1 = State#q{senders = Senders1},
+ noreply(maybe_deliver_or_enqueue(Delivery, SlaveWhenPublished, State1));
+%% [0] The second ack is since the channel thought we were a mirror at
+%% the time it published this message, so it used two credits (see
+%% rabbit_queue_type:deliver/2).
+
+handle_cast({ack, AckTags, ChPid}, State) ->
+ noreply(ack(AckTags, ChPid, State));
+
+handle_cast({reject, true, AckTags, ChPid}, State) ->
+ noreply(requeue(AckTags, ChPid, State));
+
+handle_cast({reject, false, AckTags, ChPid}, State) ->
+ noreply(with_dlx(
+ State#q.dlx,
+ fun (X) -> subtract_acks(ChPid, AckTags, State,
+ fun (State1) ->
+ dead_letter_rejected_msgs(
+ AckTags, X, State1)
+ end) end,
+ fun () -> ack(AckTags, ChPid, State) end));
+
+handle_cast({delete_exclusive, ConnPid}, State) ->
+ log_delete_exclusive(ConnPid, State),
+ stop(State);
+
+handle_cast(delete_immediately, State) ->
+ stop(State);
+
+handle_cast({resume, ChPid}, State) ->
+ noreply(possibly_unblock(rabbit_queue_consumers:resume_fun(),
+ ChPid, State));
+
+handle_cast({notify_sent, ChPid, Credit}, State) ->
+ noreply(possibly_unblock(rabbit_queue_consumers:notify_sent_fun(Credit),
+ ChPid, State));
+
+handle_cast({activate_limit, ChPid}, State) ->
+ noreply(possibly_unblock(rabbit_queue_consumers:activate_limit_fun(),
+ ChPid, State));
+
+handle_cast({set_ram_duration_target, Duration},
+ State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
+ BQS1 = BQ:set_ram_duration_target(Duration, BQS),
+ noreply(State#q{backing_queue_state = BQS1});
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+ ok = file_handle_cache:set_maximum_since_use(Age),
+ noreply(State);
+
+handle_cast(update_mirroring, State = #q{q = Q,
+ mirroring_policy_version = Version}) ->
+ case needs_update_mirroring(Q, Version) of
+ false ->
+ noreply(State);
+ {Policy, NewVersion} ->
+ State1 = State#q{mirroring_policy_version = NewVersion},
+ noreply(update_mirroring(Policy, State1))
+ end;
+
+handle_cast({credit, ChPid, CTag, Credit, Drain},
+ State = #q{consumers = Consumers,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ q = Q}) ->
+ Len = BQ:len(BQS),
+ rabbit_classic_queue:send_queue_event(ChPid, amqqueue:get_name(Q), {send_credit_reply, Len}),
+ noreply(
+ case rabbit_queue_consumers:credit(Len == 0, Credit, Drain, ChPid, CTag,
+ Consumers) of
+ unchanged -> State;
+ {unblocked, Consumers1} -> State1 = State#q{consumers = Consumers1},
+ run_message_queue(true, State1)
+ end);
+
+% Note: https://www.pivotaltracker.com/story/show/166962656
+% This event is necessary for the stats timer to be initialized with
+% the correct values once the management agent has started
+handle_cast({force_event_refresh, Ref},
+ State = #q{consumers = Consumers,
+ active_consumer = Holder}) ->
+ rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State), Ref),
+ QName = qname(State),
+ AllConsumers = rabbit_queue_consumers:all(Consumers),
+ case Holder of
+ none ->
+ [emit_consumer_created(
+ Ch, CTag, false, AckRequired, QName, Prefetch,
+ Args, Ref, ActingUser) ||
+ {Ch, CTag, AckRequired, Prefetch, _, _, Args, ActingUser}
+ <- AllConsumers];
+ {Ch, CTag} ->
+ [{Ch, CTag, AckRequired, Prefetch, _, _, Args, ActingUser}] = AllConsumers,
+ emit_consumer_created(
+ Ch, CTag, true, AckRequired, QName, Prefetch, Args, Ref, ActingUser)
+ end,
+ noreply(rabbit_event:init_stats_timer(State, #q.stats_timer));
+
+handle_cast(notify_decorators, State) ->
+ notify_decorators(State),
+ noreply(State);
+
+handle_cast(policy_changed, State = #q{q = Q0}) ->
+ Name = amqqueue:get_name(Q0),
+ %% We depend on the #q.q field being up to date at least WRT
+ %% policy (but not mirror pids) in various places, so when it
+ %% changes we go and read it from Mnesia again.
+ %%
+ %% This also has the side effect of waking us up so we emit a
+ %% stats event - so event consumers see the changed policy.
+ {ok, Q} = rabbit_amqqueue:lookup(Name),
+ noreply(process_args_policy(State#q{q = Q}));
+
+handle_cast({sync_start, _, _}, State = #q{q = Q}) ->
+ Name = amqqueue:get_name(Q),
+ %% Only a mirror should receive this, it means we are a duplicated master
+ rabbit_mirror_queue_misc:log_warning(
+ Name, "Stopping after receiving sync_start from another master", []),
+ stop(State).
+
+handle_info({maybe_expire, Vsn}, State = #q{args_policy_version = Vsn}) ->
+ case is_unused(State) of
+ true -> stop(State);
+ false -> noreply(State#q{expiry_timer_ref = undefined})
+ end;
+
+handle_info({maybe_expire, _Vsn}, State) ->
+ noreply(State);
+
+handle_info({drop_expired, Vsn}, State = #q{args_policy_version = Vsn}) ->
+ WasEmpty = is_empty(State),
+ State1 = drop_expired_msgs(State#q{ttl_timer_ref = undefined}),
+ noreply(maybe_send_drained(WasEmpty, State1));
+
+handle_info({drop_expired, _Vsn}, State) ->
+ noreply(State);
+
+handle_info(emit_stats, State) ->
+ emit_stats(State),
+ %% Don't call noreply/1, we don't want to set timers
+ {State1, Timeout} = next_state(rabbit_event:reset_stats_timer(
+ State, #q.stats_timer)),
+ {noreply, State1, Timeout};
+
+handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason},
+ State = #q{q = Q}) when ?amqqueue_exclusive_owner_is(Q, DownPid) ->
+ %% Exclusively owned queues must disappear with their owner. In
+ %% the case of clean shutdown we delete the queue synchronously in
+ %% the reader - although not required by the spec this seems to
+ %% match what people expect (see bug 21824). However we need this
+ %% monitor-and-async- delete in case the connection goes away
+ %% unexpectedly.
+ log_delete_exclusive(DownPid, State),
+ stop(State);
+
+handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
+ case handle_ch_down(DownPid, State) of
+ {ok, State1} -> noreply(State1);
+ {stop, State1} -> stop(State1)
+ end;
+
+handle_info(update_ram_duration, State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ {RamDuration, BQS1} = BQ:ram_duration(BQS),
+ DesiredDuration =
+ rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+ BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
+ %% Don't call noreply/1, we don't want to set timers
+ {State1, Timeout} = next_state(State#q{rate_timer_ref = undefined,
+ backing_queue_state = BQS2}),
+ {noreply, State1, Timeout};
+
+handle_info(sync_timeout, State) ->
+ noreply(backing_queue_timeout(State#q{sync_timer_ref = undefined}));
+
+handle_info(timeout, State) ->
+ noreply(backing_queue_timeout(State));
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+ {stop, Reason, State};
+
+handle_info({bump_credit, Msg}, State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ %% The message_store is granting us more credit. This means the
+ %% backing queue (for the rabbit_variable_queue case) might
+ %% continue paging messages to disk if it still needs to. We
+ %% consume credits from the message_store whenever we need to
+ %% persist a message to disk. See:
+ %% rabbit_variable_queue:msg_store_write/4.
+ credit_flow:handle_bump_msg(Msg),
+ noreply(State#q{backing_queue_state = BQ:resume(BQS)});
+handle_info(bump_reduce_memory_use, State = #q{backing_queue = BQ,
+ backing_queue_state = BQS0}) ->
+ BQS1 = BQ:handle_info(bump_reduce_memory_use, BQS0),
+ noreply(State#q{backing_queue_state = BQ:resume(BQS1)});
+
+handle_info(Info, State) ->
+ {stop, {unhandled_info, Info}, State}.
+
+handle_pre_hibernate(State = #q{backing_queue_state = undefined}) ->
+ {hibernate, State};
+handle_pre_hibernate(State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ {RamDuration, BQS1} = BQ:ram_duration(BQS),
+ DesiredDuration =
+ rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+ BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
+ BQS3 = BQ:handle_pre_hibernate(BQS2),
+ rabbit_event:if_enabled(
+ State, #q.stats_timer,
+ fun () -> emit_stats(State,
+ [{idle_since,
+ os:system_time(milli_seconds)},
+ {consumer_utilisation, ''}])
+ end),
+ State1 = rabbit_event:stop_stats_timer(State#q{backing_queue_state = BQS3},
+ #q.stats_timer),
+ {hibernate, stop_rate_timer(State1)}.
+
+format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ).
+
+format(Q) when ?is_amqqueue(Q) ->
+ case rabbit_mirror_queue_misc:is_mirrored(Q) of
+ false ->
+ [{node, node(amqqueue:get_pid(Q))}];
+ true ->
+ Slaves = amqqueue:get_slave_pids(Q),
+ SSlaves = amqqueue:get_sync_slave_pids(Q),
+ [{slave_nodes, [node(S) || S <- Slaves]},
+ {synchronised_slave_nodes, [node(S) || S <- SSlaves]},
+ {node, node(amqqueue:get_pid(Q))}]
+ end.
+
+-spec is_policy_applicable(amqqueue:amqqueue(), any()) -> boolean().
+is_policy_applicable(_Q, _Policy) ->
+ true.
+
+log_delete_exclusive({ConPid, _ConRef}, State) ->
+ log_delete_exclusive(ConPid, State);
+log_delete_exclusive(ConPid, #q{ q = Q }) ->
+ Resource = amqqueue:get_name(Q),
+ #resource{ name = QName, virtual_host = VHost } = Resource,
+ rabbit_log_queue:debug("Deleting exclusive queue '~s' in vhost '~s' " ++
+ "because its declaring connection ~p was closed",
+ [QName, VHost, ConPid]).
+
+log_auto_delete(Reason, #q{ q = Q }) ->
+ Resource = amqqueue:get_name(Q),
+ #resource{ name = QName, virtual_host = VHost } = Resource,
+ rabbit_log_queue:debug("Deleting auto-delete queue '~s' in vhost '~s' " ++
+ Reason,
+ [QName, VHost]).
+
+needs_update_mirroring(Q, Version) ->
+ {ok, UpQ} = rabbit_amqqueue:lookup(amqqueue:get_name(Q)),
+ DBVersion = amqqueue:get_policy_version(UpQ),
+ case DBVersion > Version of
+ true -> {rabbit_policy:get(<<"ha-mode">>, UpQ), DBVersion};
+ false -> false
+ end.
+
+
+update_mirroring(Policy, State = #q{backing_queue = BQ}) ->
+ case update_to(Policy, BQ) of
+ start_mirroring ->
+ start_mirroring(State);
+ stop_mirroring ->
+ stop_mirroring(State);
+ ignore ->
+ State;
+ update_ha_mode ->
+ update_ha_mode(State)
+ end.
+
+update_to(undefined, rabbit_mirror_queue_master) ->
+ stop_mirroring;
+update_to(_, rabbit_mirror_queue_master) ->
+ update_ha_mode;
+update_to(undefined, BQ) when BQ =/= rabbit_mirror_queue_master ->
+ ignore;
+update_to(_, BQ) when BQ =/= rabbit_mirror_queue_master ->
+ start_mirroring.
+
+start_mirroring(State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ %% lookup again to get policy for init_with_existing_bq
+ {ok, Q} = rabbit_amqqueue:lookup(qname(State)),
+ true = BQ =/= rabbit_mirror_queue_master, %% assertion
+ BQ1 = rabbit_mirror_queue_master,
+ BQS1 = BQ1:init_with_existing_bq(Q, BQ, BQS),
+ State#q{backing_queue = BQ1,
+ backing_queue_state = BQS1}.
+
+stop_mirroring(State = #q{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ BQ = rabbit_mirror_queue_master, %% assertion
+ {BQ1, BQS1} = BQ:stop_mirroring(BQS),
+ State#q{backing_queue = BQ1,
+ backing_queue_state = BQS1}.
+
+update_ha_mode(State) ->
+ {ok, Q} = rabbit_amqqueue:lookup(qname(State)),
+ ok = rabbit_mirror_queue_misc:update_mirrors(Q),
+ State.
+
+confirm_to_sender(Pid, QName, MsgSeqNos) ->
+ rabbit_classic_queue:confirm_to_sender(Pid, QName, MsgSeqNos).
+
+
diff --git a/deps/rabbit/src/rabbit_amqqueue_sup.erl b/deps/rabbit/src/rabbit_amqqueue_sup.erl
new file mode 100644
index 0000000000..a9eaf4087f
--- /dev/null
+++ b/deps/rabbit/src/rabbit_amqqueue_sup.erl
@@ -0,0 +1,35 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_amqqueue_sup).
+
+-behaviour(supervisor2).
+
+-export([start_link/2]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-spec start_link(amqqueue:amqqueue(), rabbit_prequeue:start_mode()) ->
+ {'ok', pid(), pid()}.
+
+start_link(Q, StartMode) ->
+ Marker = spawn_link(fun() -> receive stop -> ok end end),
+ ChildSpec = {rabbit_amqqueue,
+ {rabbit_prequeue, start_link, [Q, StartMode, Marker]},
+ intrinsic, ?WORKER_WAIT, worker, [rabbit_amqqueue_process,
+ rabbit_mirror_queue_slave]},
+ {ok, SupPid} = supervisor2:start_link(?MODULE, []),
+ {ok, QPid} = supervisor2:start_child(SupPid, ChildSpec),
+ unlink(Marker),
+ Marker ! stop,
+ {ok, SupPid, QPid}.
+
+init([]) -> {ok, {{one_for_one, 5, 10}, []}}.
diff --git a/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl b/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl
new file mode 100644
index 0000000000..732816b79f
--- /dev/null
+++ b/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl
@@ -0,0 +1,84 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_amqqueue_sup_sup).
+
+-behaviour(supervisor2).
+
+-export([start_link/0, start_queue_process/3]).
+-export([start_for_vhost/1, stop_for_vhost/1,
+ find_for_vhost/2, find_for_vhost/1]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+-define(SERVER, ?MODULE).
+
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ supervisor2:start_link(?MODULE, []).
+
+-spec start_queue_process
+ (node(), amqqueue:amqqueue(), 'declare' | 'recovery' | 'slave') ->
+ pid().
+
+start_queue_process(Node, Q, StartMode) ->
+ #resource{virtual_host = VHost} = amqqueue:get_name(Q),
+ {ok, Sup} = find_for_vhost(VHost, Node),
+ {ok, _SupPid, QPid} = supervisor2:start_child(Sup, [Q, StartMode]),
+ QPid.
+
+init([]) ->
+ {ok, {{simple_one_for_one, 10, 10},
+ [{rabbit_amqqueue_sup, {rabbit_amqqueue_sup, start_link, []},
+ temporary, ?SUPERVISOR_WAIT, supervisor, [rabbit_amqqueue_sup]}]}}.
+
+-spec find_for_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}.
+find_for_vhost(VHost) ->
+ find_for_vhost(VHost, node()).
+
+-spec find_for_vhost(rabbit_types:vhost(), atom()) -> {ok, pid()} | {error, term()}.
+find_for_vhost(VHost, Node) ->
+ {ok, VHostSup} = rabbit_vhost_sup_sup:get_vhost_sup(VHost, Node),
+ case supervisor2:find_child(VHostSup, rabbit_amqqueue_sup_sup) of
+ [QSup] -> {ok, QSup};
+ Result -> {error, {queue_supervisor_not_found, Result}}
+ end.
+
+-spec start_for_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}.
+start_for_vhost(VHost) ->
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of
+ {ok, VHostSup} ->
+ supervisor2:start_child(
+ VHostSup,
+ {rabbit_amqqueue_sup_sup,
+ {rabbit_amqqueue_sup_sup, start_link, []},
+ transient, infinity, supervisor, [rabbit_amqqueue_sup_sup]});
+ %% we can get here if a vhost is added and removed concurrently
+ %% e.g. some integration tests do it
+ {error, {no_such_vhost, VHost}} ->
+ rabbit_log:error("Failed to start a queue process supervisor for vhost ~s: vhost no longer exists!",
+ [VHost]),
+ {error, {no_such_vhost, VHost}}
+ end.
+
+-spec stop_for_vhost(rabbit_types:vhost()) -> ok.
+stop_for_vhost(VHost) ->
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of
+ {ok, VHostSup} ->
+ ok = supervisor2:terminate_child(VHostSup, rabbit_amqqueue_sup_sup),
+ ok = supervisor2:delete_child(VHostSup, rabbit_amqqueue_sup_sup);
+ %% see start/1
+ {error, {no_such_vhost, VHost}} ->
+ rabbit_log:error("Failed to stop a queue process supervisor for vhost ~s: vhost no longer exists!",
+ [VHost]),
+ ok
+ end.
diff --git a/deps/rabbit/src/rabbit_auth_backend_internal.erl b/deps/rabbit/src/rabbit_auth_backend_internal.erl
new file mode 100644
index 0000000000..cb930a1630
--- /dev/null
+++ b/deps/rabbit/src/rabbit_auth_backend_internal.erl
@@ -0,0 +1,1076 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_auth_backend_internal).
+-include("rabbit.hrl").
+
+-behaviour(rabbit_authn_backend).
+-behaviour(rabbit_authz_backend).
+
+-export([user_login_authentication/2, user_login_authorization/2,
+ check_vhost_access/3, check_resource_access/4, check_topic_access/4]).
+
+-export([add_user/3, delete_user/2, lookup_user/1, exists/1,
+ change_password/3, clear_password/2,
+ hash_password/2, change_password_hash/2, change_password_hash/3,
+ set_tags/3, set_permissions/6, clear_permissions/3,
+ set_topic_permissions/6, clear_topic_permissions/3, clear_topic_permissions/4,
+ add_user_sans_validation/3, put_user/2, put_user/3]).
+
+-export([set_user_limits/3, clear_user_limits/3, is_over_connection_limit/1,
+ is_over_channel_limit/1, get_user_limits/0, get_user_limits/1]).
+
+-export([user_info_keys/0, perms_info_keys/0,
+ user_perms_info_keys/0, vhost_perms_info_keys/0,
+ user_vhost_perms_info_keys/0, all_users/0,
+ list_users/0, list_users/2, list_permissions/0,
+ list_user_permissions/1, list_user_permissions/3,
+ list_topic_permissions/0,
+ list_vhost_permissions/1, list_vhost_permissions/3,
+ list_user_vhost_permissions/2,
+ list_user_topic_permissions/1, list_vhost_topic_permissions/1, list_user_vhost_topic_permissions/2]).
+
+-export([state_can_expire/0]).
+
+%% for testing
+-export([hashing_module_for_user/1, expand_topic_permission/2]).
+
+%%----------------------------------------------------------------------------
+
+-type regexp() :: binary().
+
+%%----------------------------------------------------------------------------
+%% Implementation of rabbit_auth_backend
+
+%% Returns a password hashing module for the user record provided. If
+%% there is no information in the record, we consider it to be legacy
+%% (inserted by a version older than 3.6.0) and fall back to MD5, the
+%% now obsolete hashing function.
+hashing_module_for_user(User) ->
+ ModOrUndefined = internal_user:get_hashing_algorithm(User),
+ rabbit_password:hashing_mod(ModOrUndefined).
+
+-define(BLANK_PASSWORD_REJECTION_MESSAGE,
+ "user '~s' attempted to log in with a blank password, which is prohibited by the internal authN backend. "
+ "To use TLS/x509 certificate-based authentication, see the rabbitmq_auth_mechanism_ssl plugin and configure the client to use the EXTERNAL authentication mechanism. "
+ "Alternatively change the password for the user to be non-blank.").
+
+%% For cases when we do not have a set of credentials,
+%% namely when x509 (TLS) certificates are used. This should only be
+%% possible when the EXTERNAL authentication mechanism is used, see
+%% rabbit_auth_mechanism_plain:handle_response/2 and rabbit_reader:auth_phase/2.
+user_login_authentication(Username, []) ->
+ internal_check_user_login(Username, fun(_) -> true end);
+%% For cases when we do have a set of credentials. rabbit_auth_mechanism_plain:handle_response/2
+%% performs initial validation.
+user_login_authentication(Username, AuthProps) ->
+ case lists:keyfind(password, 1, AuthProps) of
+ {password, <<"">>} ->
+ {refused, ?BLANK_PASSWORD_REJECTION_MESSAGE,
+ [Username]};
+ {password, ""} ->
+ {refused, ?BLANK_PASSWORD_REJECTION_MESSAGE,
+ [Username]};
+ {password, Cleartext} ->
+ internal_check_user_login(
+ Username,
+ fun(User) ->
+ case internal_user:get_password_hash(User) of
+ <<Salt:4/binary, Hash/binary>> ->
+ Hash =:= rabbit_password:salted_hash(
+ hashing_module_for_user(User), Salt, Cleartext);
+ _ ->
+ false
+ end
+ end);
+ false -> exit({unknown_auth_props, Username, AuthProps})
+ end.
+
+state_can_expire() -> false.
+
+user_login_authorization(Username, _AuthProps) ->
+ case user_login_authentication(Username, []) of
+ {ok, #auth_user{impl = Impl, tags = Tags}} -> {ok, Impl, Tags};
+ Else -> Else
+ end.
+
+internal_check_user_login(Username, Fun) ->
+ Refused = {refused, "user '~s' - invalid credentials", [Username]},
+ case lookup_user(Username) of
+ {ok, User} ->
+ Tags = internal_user:get_tags(User),
+ case Fun(User) of
+ true -> {ok, #auth_user{username = Username,
+ tags = Tags,
+ impl = none}};
+ _ -> Refused
+ end;
+ {error, not_found} ->
+ Refused
+ end.
+
+check_vhost_access(#auth_user{username = Username}, VHostPath, _AuthzData) ->
+ case mnesia:dirty_read({rabbit_user_permission,
+ #user_vhost{username = Username,
+ virtual_host = VHostPath}}) of
+ [] -> false;
+ [_R] -> true
+ end.
+
+check_resource_access(#auth_user{username = Username},
+ #resource{virtual_host = VHostPath, name = Name},
+ Permission,
+ _AuthContext) ->
+ case mnesia:dirty_read({rabbit_user_permission,
+ #user_vhost{username = Username,
+ virtual_host = VHostPath}}) of
+ [] ->
+ false;
+ [#user_permission{permission = P}] ->
+ PermRegexp = case element(permission_index(Permission), P) of
+ %% <<"^$">> breaks Emacs' erlang mode
+ <<"">> -> <<$^, $$>>;
+ RE -> RE
+ end,
+ case re:run(Name, PermRegexp, [{capture, none}]) of
+ match -> true;
+ nomatch -> false
+ end
+ end.
+
+check_topic_access(#auth_user{username = Username},
+ #resource{virtual_host = VHostPath, name = Name, kind = topic},
+ Permission,
+ Context) ->
+ case mnesia:dirty_read({rabbit_topic_permission,
+ #topic_permission_key{user_vhost = #user_vhost{username = Username,
+ virtual_host = VHostPath},
+ exchange = Name
+ }}) of
+ [] ->
+ true;
+ [#topic_permission{permission = P}] ->
+ PermRegexp = case element(permission_index(Permission), P) of
+ %% <<"^$">> breaks Emacs' erlang mode
+ <<"">> -> <<$^, $$>>;
+ RE -> RE
+ end,
+ PermRegexpExpanded = expand_topic_permission(
+ PermRegexp,
+ maps:get(variable_map, Context, undefined)
+ ),
+ case re:run(maps:get(routing_key, Context), PermRegexpExpanded, [{capture, none}]) of
+ match -> true;
+ nomatch -> false
+ end
+ end.
+
+expand_topic_permission(Permission, ToExpand) when is_map(ToExpand) ->
+ Opening = <<"{">>,
+ Closing = <<"}">>,
+ ReplaceFun = fun(K, V, Acc) ->
+ Placeholder = <<Opening/binary, K/binary, Closing/binary>>,
+ binary:replace(Acc, Placeholder, V, [global])
+ end,
+ maps:fold(ReplaceFun, Permission, ToExpand);
+expand_topic_permission(Permission, _ToExpand) ->
+ Permission.
+
+permission_index(configure) -> #permission.configure;
+permission_index(write) -> #permission.write;
+permission_index(read) -> #permission.read.
+
+%%----------------------------------------------------------------------------
+%% Manipulation of the user database
+
+validate_credentials(Username, Password) ->
+ rabbit_credential_validation:validate(Username, Password).
+
+validate_and_alternate_credentials(Username, Password, ActingUser, Fun) ->
+ case validate_credentials(Username, Password) of
+ ok ->
+ Fun(Username, Password, ActingUser);
+ {error, Err} ->
+ rabbit_log:error("Credential validation for '~s' failed!~n", [Username]),
+ {error, Err}
+ end.
+
+-spec add_user(rabbit_types:username(), rabbit_types:password(),
+ rabbit_types:username()) -> 'ok' | {'error', string()}.
+
+add_user(Username, Password, ActingUser) ->
+ validate_and_alternate_credentials(Username, Password, ActingUser,
+ fun add_user_sans_validation/3).
+
+add_user_sans_validation(Username, Password, ActingUser) ->
+ rabbit_log:debug("Asked to create a new user '~s', password length in bytes: ~p", [Username, bit_size(Password)]),
+ %% hash_password will pick the hashing function configured for us
+ %% but we also need to store a hint as part of the record, so we
+ %% retrieve it here one more time
+ HashingMod = rabbit_password:hashing_mod(),
+ PasswordHash = hash_password(HashingMod, Password),
+ User = internal_user:create_user(Username, PasswordHash, HashingMod),
+ try
+ R = rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ case mnesia:wread({rabbit_user, Username}) of
+ [] ->
+ ok = mnesia:write(rabbit_user, User, write);
+ _ ->
+ mnesia:abort({user_already_exists, Username})
+ end
+ end),
+ rabbit_log:info("Created user '~s'", [Username]),
+ rabbit_event:notify(user_created, [{name, Username},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {user_already_exists, _}} = Error ->
+ rabbit_log:warning("Failed to add user '~s': the user already exists", [Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to add user '~s': ~p", [Username, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to add user '~s': ~p", [Username, Error]),
+ exit(Error)
+ end .
+
+-spec delete_user(rabbit_types:username(), rabbit_types:username()) -> 'ok'.
+
+delete_user(Username, ActingUser) ->
+ rabbit_log:debug("Asked to delete user '~s'", [Username]),
+ try
+ R = rabbit_misc:execute_mnesia_transaction(
+ rabbit_misc:with_user(
+ Username,
+ fun () ->
+ ok = mnesia:delete({rabbit_user, Username}),
+ [ok = mnesia:delete_object(
+ rabbit_user_permission, R, write) ||
+ R <- mnesia:match_object(
+ rabbit_user_permission,
+ #user_permission{user_vhost = #user_vhost{
+ username = Username,
+ virtual_host = '_'},
+ permission = '_'},
+ write)],
+ UserTopicPermissionsQuery = match_user_vhost_topic_permission(Username, '_'),
+ UserTopicPermissions = UserTopicPermissionsQuery(),
+ [ok = mnesia:delete_object(rabbit_topic_permission, R, write) || R <- UserTopicPermissions],
+ ok
+ end)),
+ rabbit_log:info("Deleted user '~s'", [Username]),
+ rabbit_event:notify(user_deleted,
+ [{name, Username},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {no_such_user, _}} = Error ->
+ rabbit_log:warning("Failed to delete user '~s': the user does not exist", [Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to delete user '~s': ~p", [Username, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to delete user '~s': ~p", [Username, Error]),
+ exit(Error)
+ end .
+
+-spec lookup_user
+ (rabbit_types:username()) ->
+ rabbit_types:ok(internal_user:internal_user()) |
+ rabbit_types:error('not_found').
+
+lookup_user(Username) ->
+ rabbit_misc:dirty_read({rabbit_user, Username}).
+
+-spec exists(rabbit_types:username()) -> boolean().
+
+exists(Username) ->
+ case lookup_user(Username) of
+ {error, not_found} -> false;
+ _ -> true
+ end.
+
+-spec change_password
+ (rabbit_types:username(), rabbit_types:password(), rabbit_types:username()) -> 'ok'.
+
+change_password(Username, Password, ActingUser) ->
+ validate_and_alternate_credentials(Username, Password, ActingUser,
+ fun change_password_sans_validation/3).
+
+change_password_sans_validation(Username, Password, ActingUser) ->
+ try
+ rabbit_log:debug("Asked to change password of user '~s', new password length in bytes: ~p", [Username, bit_size(Password)]),
+ HashingAlgorithm = rabbit_password:hashing_mod(),
+ R = change_password_hash(Username,
+ hash_password(rabbit_password:hashing_mod(),
+ Password),
+ HashingAlgorithm),
+ rabbit_log:info("Successfully changed password for user '~s'", [Username]),
+ rabbit_event:notify(user_password_changed,
+ [{name, Username},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {no_such_user, _}} = Error ->
+ rabbit_log:warning("Failed to change password for user '~s': the user does not exist", [Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to change password for user '~s': ~p", [Username, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to change password for user '~s': ~p", [Username, Error]),
+ exit(Error)
+ end.
+
+-spec clear_password(rabbit_types:username(), rabbit_types:username()) -> 'ok'.
+
+clear_password(Username, ActingUser) ->
+ rabbit_log:info("Clearing password for '~s'~n", [Username]),
+ R = change_password_hash(Username, <<"">>),
+ rabbit_event:notify(user_password_cleared,
+ [{name, Username},
+ {user_who_performed_action, ActingUser}]),
+ R.
+
+-spec hash_password
+ (module(), rabbit_types:password()) -> rabbit_types:password_hash().
+
+hash_password(HashingMod, Cleartext) ->
+ rabbit_password:hash(HashingMod, Cleartext).
+
+-spec change_password_hash
+ (rabbit_types:username(), rabbit_types:password_hash()) -> 'ok'.
+
+change_password_hash(Username, PasswordHash) ->
+ change_password_hash(Username, PasswordHash, rabbit_password:hashing_mod()).
+
+
+change_password_hash(Username, PasswordHash, HashingAlgorithm) ->
+ update_user(Username, fun(User) ->
+ internal_user:set_password_hash(User,
+ PasswordHash, HashingAlgorithm)
+ end).
+
+-spec set_tags(rabbit_types:username(), [atom()], rabbit_types:username()) -> 'ok'.
+
+set_tags(Username, Tags, ActingUser) ->
+ ConvertedTags = [rabbit_data_coercion:to_atom(I) || I <- Tags],
+ rabbit_log:debug("Asked to set user tags for user '~s' to ~p", [Username, ConvertedTags]),
+ try
+ R = update_user(Username, fun(User) ->
+ internal_user:set_tags(User, ConvertedTags)
+ end),
+ rabbit_log:info("Successfully set user tags for user '~s' to ~p", [Username, ConvertedTags]),
+ rabbit_event:notify(user_tags_set, [{name, Username}, {tags, ConvertedTags},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {no_such_user, _}} = Error ->
+ rabbit_log:warning("Failed to set tags for user '~s': the user does not exist", [Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to set tags for user '~s': ~p", [Username, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to set tags for user '~s': ~p", [Username, Error]),
+ exit(Error)
+ end .
+
+-spec set_permissions
+ (rabbit_types:username(), rabbit_types:vhost(), regexp(), regexp(),
+ regexp(), rabbit_types:username()) ->
+ 'ok'.
+
+set_permissions(Username, VirtualHost, ConfigurePerm, WritePerm, ReadPerm, ActingUser) ->
+ rabbit_log:debug("Asked to set permissions for "
+ "'~s' in virtual host '~s' to '~s', '~s', '~s'",
+ [Username, VirtualHost, ConfigurePerm, WritePerm, ReadPerm]),
+ lists:map(
+ fun (RegexpBin) ->
+ Regexp = binary_to_list(RegexpBin),
+ case re:compile(Regexp) of
+ {ok, _} -> ok;
+ {error, Reason} ->
+ rabbit_log:warning("Failed to set permissions for '~s' in virtual host '~s': "
+ "regular expression '~s' is invalid",
+ [Username, VirtualHost, RegexpBin]),
+ throw({error, {invalid_regexp, Regexp, Reason}})
+ end
+ end, [ConfigurePerm, WritePerm, ReadPerm]),
+ try
+ R = rabbit_misc:execute_mnesia_transaction(
+ rabbit_vhost:with_user_and_vhost(
+ Username, VirtualHost,
+ fun () -> ok = mnesia:write(
+ rabbit_user_permission,
+ #user_permission{user_vhost = #user_vhost{
+ username = Username,
+ virtual_host = VirtualHost},
+ permission = #permission{
+ configure = ConfigurePerm,
+ write = WritePerm,
+ read = ReadPerm}},
+ write)
+ end)),
+ rabbit_log:info("Successfully set permissions for "
+ "'~s' in virtual host '~s' to '~s', '~s', '~s'",
+ [Username, VirtualHost, ConfigurePerm, WritePerm, ReadPerm]),
+ rabbit_event:notify(permission_created, [{user, Username},
+ {vhost, VirtualHost},
+ {configure, ConfigurePerm},
+ {write, WritePerm},
+ {read, ReadPerm},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {no_such_vhost, _}} = Error ->
+ rabbit_log:warning("Failed to set permissions for '~s': virtual host '~s' does not exist",
+ [Username, VirtualHost]),
+ throw(Error);
+ throw:{error, {no_such_user, _}} = Error ->
+ rabbit_log:warning("Failed to set permissions for '~s': the user does not exist",
+ [Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to set permissions for '~s' in virtual host '~s': ~p",
+ [Username, VirtualHost, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to set permissions for '~s' in virtual host '~s': ~p",
+ [Username, VirtualHost, Error]),
+ exit(Error)
+ end.
+
+-spec clear_permissions
+ (rabbit_types:username(), rabbit_types:vhost(), rabbit_types:username()) -> 'ok'.
+
+clear_permissions(Username, VirtualHost, ActingUser) ->
+ rabbit_log:debug("Asked to clear permissions for '~s' in virtual host '~s'",
+ [Username, VirtualHost]),
+ try
+ R = rabbit_misc:execute_mnesia_transaction(
+ rabbit_vhost:with_user_and_vhost(
+ Username, VirtualHost,
+ fun () ->
+ ok = mnesia:delete({rabbit_user_permission,
+ #user_vhost{username = Username,
+ virtual_host = VirtualHost}})
+ end)),
+ rabbit_log:info("Successfully cleared permissions for '~s' in virtual host '~s'",
+ [Username, VirtualHost]),
+ rabbit_event:notify(permission_deleted, [{user, Username},
+ {vhost, VirtualHost},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {no_such_vhost, _}} = Error ->
+ rabbit_log:warning("Failed to clear permissions for '~s': virtual host '~s' does not exist",
+ [Username, VirtualHost]),
+ throw(Error);
+ throw:{error, {no_such_user, _}} = Error ->
+ rabbit_log:warning("Failed to clear permissions for '~s': the user does not exist",
+ [Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to clear permissions for '~s' in virtual host '~s': ~p",
+ [Username, VirtualHost, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to clear permissions for '~s' in virtual host '~s': ~p",
+ [Username, VirtualHost, Error]),
+ exit(Error)
+ end.
+
+
+update_user(Username, Fun) ->
+ rabbit_misc:execute_mnesia_transaction(
+ rabbit_misc:with_user(
+ Username,
+ fun () ->
+ {ok, User} = lookup_user(Username),
+ ok = mnesia:write(rabbit_user, Fun(User), write)
+ end)).
+
+set_topic_permissions(Username, VirtualHost, Exchange, WritePerm, ReadPerm, ActingUser) ->
+ rabbit_log:debug("Asked to set topic permissions on exchange '~s' for "
+ "user '~s' in virtual host '~s' to '~s', '~s'",
+ [Exchange, Username, VirtualHost, WritePerm, ReadPerm]),
+ WritePermRegex = rabbit_data_coercion:to_binary(WritePerm),
+ ReadPermRegex = rabbit_data_coercion:to_binary(ReadPerm),
+ lists:map(
+ fun (RegexpBin) ->
+ case re:compile(RegexpBin) of
+ {ok, _} -> ok;
+ {error, Reason} ->
+ rabbit_log:warning("Failed to set topic permissions on exchange '~s' for "
+ "'~s' in virtual host '~s': regular expression '~s' is invalid",
+ [Exchange, Username, VirtualHost, RegexpBin]),
+ throw({error, {invalid_regexp, RegexpBin, Reason}})
+ end
+ end, [WritePerm, ReadPerm]),
+ try
+ R = rabbit_misc:execute_mnesia_transaction(
+ rabbit_vhost:with_user_and_vhost(
+ Username, VirtualHost,
+ fun () -> ok = mnesia:write(
+ rabbit_topic_permission,
+ #topic_permission{
+ topic_permission_key = #topic_permission_key{
+ user_vhost = #user_vhost{
+ username = Username,
+ virtual_host = VirtualHost},
+ exchange = Exchange
+ },
+ permission = #permission{
+ write = WritePermRegex,
+ read = ReadPermRegex
+ }
+ },
+ write)
+ end)),
+ rabbit_log:info("Successfully set topic permissions on exchange '~s' for "
+ "'~s' in virtual host '~s' to '~s', '~s'",
+ [Exchange, Username, VirtualHost, WritePerm, ReadPerm]),
+ rabbit_event:notify(topic_permission_created, [
+ {user, Username},
+ {vhost, VirtualHost},
+ {exchange, Exchange},
+ {write, WritePermRegex},
+ {read, ReadPermRegex},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {no_such_vhost, _}} = Error ->
+ rabbit_log:warning("Failed to set topic permissions on exchange '~s' for '~s': virtual host '~s' does not exist.",
+ [Exchange, Username, VirtualHost]),
+ throw(Error);
+ throw:{error, {no_such_user, _}} = Error ->
+ rabbit_log:warning("Failed to set topic permissions on exchange '~s' for '~s': the user does not exist.",
+ [Exchange, Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to set topic permissions on exchange '~s' for '~s' in virtual host '~s': ~p.",
+ [Exchange, Username, VirtualHost, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to set topic permissions on exchange '~s' for '~s' in virtual host '~s': ~p.",
+ [Exchange, Username, VirtualHost, Error]),
+ exit(Error)
+ end .
+
+clear_topic_permissions(Username, VirtualHost, ActingUser) ->
+ rabbit_log:debug("Asked to clear topic permissions for '~s' in virtual host '~s'",
+ [Username, VirtualHost]),
+ try
+ R = rabbit_misc:execute_mnesia_transaction(
+ rabbit_vhost:with_user_and_vhost(
+ Username, VirtualHost,
+ fun () ->
+ ListFunction = match_user_vhost_topic_permission(Username, VirtualHost),
+ List = ListFunction(),
+ lists:foreach(fun(X) ->
+ ok = mnesia:delete_object(rabbit_topic_permission, X, write)
+ end, List)
+ end)),
+ rabbit_log:info("Successfully cleared topic permissions for '~s' in virtual host '~s'",
+ [Username, VirtualHost]),
+ rabbit_event:notify(topic_permission_deleted, [{user, Username},
+ {vhost, VirtualHost},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {no_such_vhost, _}} = Error ->
+ rabbit_log:warning("Failed to clear topic permissions for '~s': virtual host '~s' does not exist",
+ [Username, VirtualHost]),
+ throw(Error);
+ throw:{error, {no_such_user, _}} = Error ->
+ rabbit_log:warning("Failed to clear topic permissions for '~s': the user does not exist",
+ [Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to clear topic permissions for '~s' in virtual host '~s': ~p",
+ [Username, VirtualHost, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to clear topic permissions for '~s' in virtual host '~s': ~p",
+ [Username, VirtualHost, Error]),
+ exit(Error)
+ end.
+
+clear_topic_permissions(Username, VirtualHost, Exchange, ActingUser) ->
+ rabbit_log:debug("Asked to clear topic permissions on exchange '~s' for '~s' in virtual host '~s'",
+ [Exchange, Username, VirtualHost]),
+ try
+ R = rabbit_misc:execute_mnesia_transaction(
+ rabbit_vhost:with_user_and_vhost(
+ Username, VirtualHost,
+ fun () ->
+ ok = mnesia:delete(rabbit_topic_permission,
+ #topic_permission_key{
+ user_vhost = #user_vhost{
+ username = Username,
+ virtual_host = VirtualHost},
+ exchange = Exchange
+ }, write)
+ end)),
+ rabbit_log:info("Successfully cleared topic permissions on exchange '~s' for '~s' in virtual host '~s'",
+ [Exchange, Username, VirtualHost]),
+ rabbit_event:notify(permission_deleted, [{user, Username},
+ {vhost, VirtualHost},
+ {user_who_performed_action, ActingUser}]),
+ R
+ catch
+ throw:{error, {no_such_vhost, _}} = Error ->
+ rabbit_log:warning("Failed to clear topic permissions on exchange '~s' for '~s': virtual host '~s' does not exist",
+ [Exchange, Username, VirtualHost]),
+ throw(Error);
+ throw:{error, {no_such_user, _}} = Error ->
+ rabbit_log:warning("Failed to clear topic permissions on exchange '~s' for '~s': the user does not exist",
+ [Exchange, Username]),
+ throw(Error);
+ throw:Error ->
+ rabbit_log:warning("Failed to clear topic permissions on exchange '~s' for '~s' in virtual host '~s': ~p",
+ [Exchange, Username, VirtualHost, Error]),
+ throw(Error);
+ exit:Error ->
+ rabbit_log:warning("Failed to clear topic permissions on exchange '~s' for '~s' in virtual host '~s': ~p",
+ [Exchange, Username, VirtualHost, Error]),
+ exit(Error)
+ end.
+
+put_user(User, ActingUser) -> put_user(User, undefined, ActingUser).
+
+put_user(User, Version, ActingUser) ->
+ Username = maps:get(name, User),
+ HasPassword = maps:is_key(password, User),
+ HasPasswordHash = maps:is_key(password_hash, User),
+ Password = maps:get(password, User, undefined),
+ PasswordHash = maps:get(password_hash, User, undefined),
+
+ Tags = case {maps:get(tags, User, undefined), maps:get(administrator, User, undefined)} of
+ {undefined, undefined} ->
+ throw({error, tags_not_present});
+ {undefined, AdminS} ->
+ case rabbit_misc:parse_bool(AdminS) of
+ true -> [administrator];
+ false -> []
+ end;
+ {TagsS, _} ->
+ [list_to_atom(string:strip(T)) ||
+ T <- string:tokens(binary_to_list(TagsS), ",")]
+ end,
+
+ %% pre-configured, only applies to newly created users
+ Permissions = maps:get(permissions, User, undefined),
+
+ PassedCredentialValidation =
+ case {HasPassword, HasPasswordHash} of
+ {true, false} ->
+ rabbit_credential_validation:validate(Username, Password) =:= ok;
+ {false, true} -> true;
+ _ ->
+ rabbit_credential_validation:validate(Username, Password) =:= ok
+ end,
+
+ case exists(Username) of
+ true ->
+ case {HasPassword, HasPasswordHash} of
+ {true, false} ->
+ update_user_password(PassedCredentialValidation, Username, Password, Tags, ActingUser);
+ {false, true} ->
+ update_user_password_hash(Username, PasswordHash, Tags, User, Version, ActingUser);
+ {true, true} ->
+ throw({error, both_password_and_password_hash_are_provided});
+ %% clear password, update tags if needed
+ _ ->
+ rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser),
+ rabbit_auth_backend_internal:clear_password(Username, ActingUser)
+ end;
+ false ->
+ case {HasPassword, HasPasswordHash} of
+ {true, false} ->
+ create_user_with_password(PassedCredentialValidation, Username, Password, Tags, Permissions, ActingUser);
+ {false, true} ->
+ create_user_with_password_hash(Username, PasswordHash, Tags, User, Version, Permissions, ActingUser);
+ {true, true} ->
+ throw({error, both_password_and_password_hash_are_provided});
+ {false, false} ->
+ %% this user won't be able to sign in using
+ %% a username/password pair but can be used for x509 certificate authentication,
+ %% with authn backends such as HTTP or LDAP and so on.
+ create_user_with_password(PassedCredentialValidation, Username, <<"">>, Tags, Permissions, ActingUser)
+ end
+ end.
+
+update_user_password(_PassedCredentialValidation = true, Username, Password, Tags, ActingUser) ->
+ rabbit_auth_backend_internal:change_password(Username, Password, ActingUser),
+ rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser);
+update_user_password(_PassedCredentialValidation = false, _Username, _Password, _Tags, _ActingUser) ->
+ %% we don't log here because
+ %% rabbit_auth_backend_internal will do it
+ throw({error, credential_validation_failed}).
+
+update_user_password_hash(Username, PasswordHash, Tags, User, Version, ActingUser) ->
+ %% when a hash this provided, credential validation
+ %% is not applied
+ HashingAlgorithm = hashing_algorithm(User, Version),
+
+ Hash = rabbit_misc:b64decode_or_throw(PasswordHash),
+ rabbit_auth_backend_internal:change_password_hash(
+ Username, Hash, HashingAlgorithm),
+ rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser).
+
+create_user_with_password(_PassedCredentialValidation = true, Username, Password, Tags, undefined, ActingUser) ->
+ rabbit_auth_backend_internal:add_user(Username, Password, ActingUser),
+ rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser);
+create_user_with_password(_PassedCredentialValidation = true, Username, Password, Tags, PreconfiguredPermissions, ActingUser) ->
+ rabbit_auth_backend_internal:add_user(Username, Password, ActingUser),
+ rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser),
+ preconfigure_permissions(Username, PreconfiguredPermissions, ActingUser);
+create_user_with_password(_PassedCredentialValidation = false, _Username, _Password, _Tags, _, _) ->
+ %% we don't log here because
+ %% rabbit_auth_backend_internal will do it
+ throw({error, credential_validation_failed}).
+
+create_user_with_password_hash(Username, PasswordHash, Tags, User, Version, PreconfiguredPermissions, ActingUser) ->
+ %% when a hash this provided, credential validation
+ %% is not applied
+ HashingAlgorithm = hashing_algorithm(User, Version),
+ Hash = rabbit_misc:b64decode_or_throw(PasswordHash),
+
+ %% first we create a user with dummy credentials and no
+ %% validation applied, then we update password hash
+ TmpPassword = rabbit_guid:binary(rabbit_guid:gen_secure(), "tmp"),
+ rabbit_auth_backend_internal:add_user_sans_validation(Username, TmpPassword, ActingUser),
+
+ rabbit_auth_backend_internal:change_password_hash(
+ Username, Hash, HashingAlgorithm),
+ rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser),
+ preconfigure_permissions(Username, PreconfiguredPermissions, ActingUser).
+
+preconfigure_permissions(_Username, undefined, _ActingUser) ->
+ ok;
+preconfigure_permissions(Username, Map, ActingUser) when is_map(Map) ->
+ maps:map(fun(VHost, M) ->
+ rabbit_auth_backend_internal:set_permissions(Username, VHost,
+ maps:get(<<"configure">>, M),
+ maps:get(<<"write">>, M),
+ maps:get(<<"read">>, M),
+ ActingUser)
+ end,
+ Map),
+ ok.
+
+set_user_limits(Username, Definition, ActingUser) when is_list(Definition); is_binary(Definition) ->
+ case rabbit_feature_flags:is_enabled(user_limits) of
+ true ->
+ case rabbit_json:try_decode(rabbit_data_coercion:to_binary(Definition)) of
+ {ok, Term} ->
+ validate_parameters_and_update_limit(Username, Term, ActingUser);
+ {error, Reason} ->
+ {error_string, rabbit_misc:format(
+ "JSON decoding error. Reason: ~ts", [Reason])}
+ end;
+ false -> {error_string, "cannot set any user limits: the user_limits feature flag is not enabled"}
+ end;
+set_user_limits(Username, Definition, ActingUser) when is_map(Definition) ->
+ case rabbit_feature_flags:is_enabled(user_limits) of
+ true -> validate_parameters_and_update_limit(Username, Definition, ActingUser);
+ false -> {error_string, "cannot set any user limits: the user_limits feature flag is not enabled"}
+ end.
+
+validate_parameters_and_update_limit(Username, Term, ActingUser) ->
+ case flatten_errors(rabbit_parameter_validation:proplist(
+ <<"user-limits">>, user_limit_validation(), Term)) of
+ ok ->
+ update_user(Username, fun(User) ->
+ internal_user:update_limits(add, User, Term)
+ end),
+ notify_limit_set(Username, ActingUser, Term);
+ {errors, [{Reason, Arguments}]} ->
+ {error_string, rabbit_misc:format(Reason, Arguments)}
+ end.
+
+user_limit_validation() ->
+ [{<<"max-connections">>, fun rabbit_parameter_validation:integer/2, optional},
+ {<<"max-channels">>, fun rabbit_parameter_validation:integer/2, optional}].
+
+clear_user_limits(Username, <<"all">>, ActingUser) ->
+ update_user(Username, fun(User) ->
+ internal_user:clear_limits(User)
+ end),
+ notify_limit_clear(Username, ActingUser);
+clear_user_limits(Username, LimitType, ActingUser) ->
+ update_user(Username, fun(User) ->
+ internal_user:update_limits(remove, User, LimitType)
+ end),
+ notify_limit_clear(Username, ActingUser).
+
+flatten_errors(L) ->
+ case [{F, A} || I <- lists:flatten([L]), {error, F, A} <- [I]] of
+ [] -> ok;
+ E -> {errors, E}
+ end.
+
+%%----------------------------------------------------------------------------
+%% Listing
+
+-define(PERMS_INFO_KEYS, [configure, write, read]).
+-define(USER_INFO_KEYS, [user, tags]).
+
+-spec user_info_keys() -> rabbit_types:info_keys().
+
+user_info_keys() -> ?USER_INFO_KEYS.
+
+-spec perms_info_keys() -> rabbit_types:info_keys().
+
+perms_info_keys() -> [user, vhost | ?PERMS_INFO_KEYS].
+
+-spec vhost_perms_info_keys() -> rabbit_types:info_keys().
+
+vhost_perms_info_keys() -> [user | ?PERMS_INFO_KEYS].
+
+-spec user_perms_info_keys() -> rabbit_types:info_keys().
+
+user_perms_info_keys() -> [vhost | ?PERMS_INFO_KEYS].
+
+-spec user_vhost_perms_info_keys() -> rabbit_types:info_keys().
+
+user_vhost_perms_info_keys() -> ?PERMS_INFO_KEYS.
+
+topic_perms_info_keys() -> [user, vhost, exchange, write, read].
+user_topic_perms_info_keys() -> [vhost, exchange, write, read].
+vhost_topic_perms_info_keys() -> [user, exchange, write, read].
+user_vhost_topic_perms_info_keys() -> [exchange, write, read].
+
+all_users() -> mnesia:dirty_match_object(rabbit_user, internal_user:pattern_match_all()).
+
+-spec list_users() -> [rabbit_types:infos()].
+
+list_users() ->
+ [extract_internal_user_params(U) ||
+ U <- all_users()].
+
+-spec list_users(reference(), pid()) -> 'ok'.
+
+list_users(Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(
+ AggregatorPid, Ref,
+ fun(U) -> extract_internal_user_params(U) end,
+ all_users()).
+
+-spec list_permissions() -> [rabbit_types:infos()].
+
+list_permissions() ->
+ list_permissions(perms_info_keys(), match_user_vhost('_', '_')).
+
+list_permissions(Keys, QueryThunk) ->
+ [extract_user_permission_params(Keys, U) ||
+ U <- rabbit_misc:execute_mnesia_transaction(QueryThunk)].
+
+list_permissions(Keys, QueryThunk, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(
+ AggregatorPid, Ref, fun(U) -> extract_user_permission_params(Keys, U) end,
+ rabbit_misc:execute_mnesia_transaction(QueryThunk)).
+
+filter_props(Keys, Props) -> [T || T = {K, _} <- Props, lists:member(K, Keys)].
+
+-spec list_user_permissions
+ (rabbit_types:username()) -> [rabbit_types:infos()].
+
+list_user_permissions(Username) ->
+ list_permissions(
+ user_perms_info_keys(),
+ rabbit_misc:with_user(Username, match_user_vhost(Username, '_'))).
+
+-spec list_user_permissions
+ (rabbit_types:username(), reference(), pid()) -> 'ok'.
+
+list_user_permissions(Username, Ref, AggregatorPid) ->
+ list_permissions(
+ user_perms_info_keys(),
+ rabbit_misc:with_user(Username, match_user_vhost(Username, '_')),
+ Ref, AggregatorPid).
+
+-spec list_vhost_permissions
+ (rabbit_types:vhost()) -> [rabbit_types:infos()].
+
+list_vhost_permissions(VHostPath) ->
+ list_permissions(
+ vhost_perms_info_keys(),
+ rabbit_vhost:with(VHostPath, match_user_vhost('_', VHostPath))).
+
+-spec list_vhost_permissions
+ (rabbit_types:vhost(), reference(), pid()) -> 'ok'.
+
+list_vhost_permissions(VHostPath, Ref, AggregatorPid) ->
+ list_permissions(
+ vhost_perms_info_keys(),
+ rabbit_vhost:with(VHostPath, match_user_vhost('_', VHostPath)),
+ Ref, AggregatorPid).
+
+-spec list_user_vhost_permissions
+ (rabbit_types:username(), rabbit_types:vhost()) -> [rabbit_types:infos()].
+
+list_user_vhost_permissions(Username, VHostPath) ->
+ list_permissions(
+ user_vhost_perms_info_keys(),
+ rabbit_vhost:with_user_and_vhost(
+ Username, VHostPath, match_user_vhost(Username, VHostPath))).
+
+extract_user_permission_params(Keys, #user_permission{
+ user_vhost =
+ #user_vhost{username = Username,
+ virtual_host = VHostPath},
+ permission = #permission{
+ configure = ConfigurePerm,
+ write = WritePerm,
+ read = ReadPerm}}) ->
+ filter_props(Keys, [{user, Username},
+ {vhost, VHostPath},
+ {configure, ConfigurePerm},
+ {write, WritePerm},
+ {read, ReadPerm}]).
+
+extract_internal_user_params(User) ->
+ [{user, internal_user:get_username(User)},
+ {tags, internal_user:get_tags(User)}].
+
+match_user_vhost(Username, VHostPath) ->
+ fun () -> mnesia:match_object(
+ rabbit_user_permission,
+ #user_permission{user_vhost = #user_vhost{
+ username = Username,
+ virtual_host = VHostPath},
+ permission = '_'},
+ read)
+ end.
+
+list_topic_permissions() ->
+ list_topic_permissions(topic_perms_info_keys(), match_user_vhost_topic_permission('_', '_')).
+
+list_user_topic_permissions(Username) ->
+ list_topic_permissions(user_topic_perms_info_keys(),
+ rabbit_misc:with_user(Username, match_user_vhost_topic_permission(Username, '_'))).
+
+list_vhost_topic_permissions(VHost) ->
+ list_topic_permissions(vhost_topic_perms_info_keys(),
+ rabbit_vhost:with(VHost, match_user_vhost_topic_permission('_', VHost))).
+
+list_user_vhost_topic_permissions(Username, VHost) ->
+ list_topic_permissions(user_vhost_topic_perms_info_keys(),
+ rabbit_vhost:with_user_and_vhost(Username, VHost, match_user_vhost_topic_permission(Username, VHost))).
+
+list_topic_permissions(Keys, QueryThunk) ->
+ [extract_topic_permission_params(Keys, U) ||
+ U <- rabbit_misc:execute_mnesia_transaction(QueryThunk)].
+
+match_user_vhost_topic_permission(Username, VHostPath) ->
+ match_user_vhost_topic_permission(Username, VHostPath, '_').
+
+match_user_vhost_topic_permission(Username, VHostPath, Exchange) ->
+ fun () -> mnesia:match_object(
+ rabbit_topic_permission,
+ #topic_permission{topic_permission_key = #topic_permission_key{
+ user_vhost = #user_vhost{
+ username = Username,
+ virtual_host = VHostPath},
+ exchange = Exchange},
+ permission = '_'},
+ read)
+ end.
+
+extract_topic_permission_params(Keys, #topic_permission{
+ topic_permission_key = #topic_permission_key{
+ user_vhost = #user_vhost{username = Username,
+ virtual_host = VHostPath},
+ exchange = Exchange},
+ permission = #permission{
+ write = WritePerm,
+ read = ReadPerm}}) ->
+ filter_props(Keys, [{user, Username},
+ {vhost, VHostPath},
+ {exchange, Exchange},
+ {write, WritePerm},
+ {read, ReadPerm}]).
+
+hashing_algorithm(User, Version) ->
+ case maps:get(hashing_algorithm, User, undefined) of
+ undefined ->
+ case Version of
+ %% 3.6.1 and later versions are supposed to have
+ %% the algorithm exported and thus not need a default
+ <<"3.6.0">> -> rabbit_password_hashing_sha256;
+ <<"3.5.", _/binary>> -> rabbit_password_hashing_md5;
+ <<"3.4.", _/binary>> -> rabbit_password_hashing_md5;
+ <<"3.3.", _/binary>> -> rabbit_password_hashing_md5;
+ <<"3.2.", _/binary>> -> rabbit_password_hashing_md5;
+ <<"3.1.", _/binary>> -> rabbit_password_hashing_md5;
+ <<"3.0.", _/binary>> -> rabbit_password_hashing_md5;
+ _ -> rabbit_password:hashing_mod()
+ end;
+ Alg -> rabbit_data_coercion:to_atom(Alg, utf8)
+ end.
+
+is_over_connection_limit(Username) ->
+ Fun = fun() ->
+ rabbit_connection_tracking:count_tracked_items_in({user, Username})
+ end,
+ is_over_limit(Username, <<"max-connections">>, Fun).
+
+is_over_channel_limit(Username) ->
+ Fun = fun() ->
+ rabbit_channel_tracking:count_tracked_items_in({user, Username})
+ end,
+ is_over_limit(Username, <<"max-channels">>, Fun).
+
+is_over_limit(Username, LimitType, Fun) ->
+ case get_user_limit(Username, LimitType) of
+ undefined -> false;
+ {ok, 0} -> {true, 0};
+ {ok, Limit} ->
+ case Fun() >= Limit of
+ false -> false;
+ true -> {true, Limit}
+ end
+ end.
+
+get_user_limit(Username, LimitType) ->
+ case lookup_user(Username) of
+ {ok, User} ->
+ case rabbit_misc:pget(LimitType, internal_user:get_limits(User)) of
+ undefined -> undefined;
+ N when N < 0 -> undefined;
+ N when N >= 0 -> {ok, N}
+ end;
+ _ ->
+ undefined
+ end.
+
+get_user_limits() ->
+ [{internal_user:get_username(U), internal_user:get_limits(U)} ||
+ U <- all_users(),
+ internal_user:get_limits(U) =/= #{}].
+
+get_user_limits(Username) ->
+ case lookup_user(Username) of
+ {ok, User} -> internal_user:get_limits(User);
+ _ -> undefined
+ end.
+
+notify_limit_set(Username, ActingUser, Term) ->
+ rabbit_event:notify(user_limits_set,
+ [{name, <<"limits">>}, {user_who_performed_action, ActingUser},
+ {username, Username} | maps:to_list(Term)]).
+
+notify_limit_clear(Username, ActingUser) ->
+ rabbit_event:notify(user_limits_cleared,
+ [{name, <<"limits">>}, {user_who_performed_action, ActingUser},
+ {username, Username}]).
diff --git a/deps/rabbit/src/rabbit_auth_mechanism_amqplain.erl b/deps/rabbit/src/rabbit_auth_mechanism_amqplain.erl
new file mode 100644
index 0000000000..c81a337153
--- /dev/null
+++ b/deps/rabbit/src/rabbit_auth_mechanism_amqplain.erl
@@ -0,0 +1,54 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_auth_mechanism_amqplain).
+-include("rabbit.hrl").
+
+-behaviour(rabbit_auth_mechanism).
+
+-export([description/0, should_offer/1, init/1, handle_response/2]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "auth mechanism amqplain"},
+ {mfa, {rabbit_registry, register,
+ [auth_mechanism, <<"AMQPLAIN">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+%% AMQPLAIN, as used by Qpid Python test suite. The 0-8 spec actually
+%% defines this as PLAIN, but in 0-9 that definition is gone, instead
+%% referring generically to "SASL security mechanism", i.e. the above.
+
+description() ->
+ [{description, <<"QPid AMQPLAIN mechanism">>}].
+
+should_offer(_Sock) ->
+ true.
+
+init(_Sock) ->
+ [].
+
+-define(IS_STRING_TYPE(Type), Type =:= longstr orelse Type =:= shortstr).
+
+handle_response(Response, _State) ->
+ LoginTable = rabbit_binary_parser:parse_table(Response),
+ case {lists:keysearch(<<"LOGIN">>, 1, LoginTable),
+ lists:keysearch(<<"PASSWORD">>, 1, LoginTable)} of
+ {{value, {_, UserType, User}},
+ {value, {_, PassType, Pass}}} when ?IS_STRING_TYPE(UserType);
+ ?IS_STRING_TYPE(PassType) ->
+ rabbit_access_control:check_user_pass_login(User, Pass);
+ {{value, {_, _UserType, _User}},
+ {value, {_, _PassType, _Pass}}} ->
+ {protocol_error,
+ "AMQPLAIN auth info ~w uses unsupported type for LOGIN or PASSWORD field",
+ [LoginTable]};
+ _ ->
+ {protocol_error,
+ "AMQPLAIN auth info ~w is missing LOGIN or PASSWORD field",
+ [LoginTable]}
+ end.
diff --git a/deps/rabbit/src/rabbit_auth_mechanism_cr_demo.erl b/deps/rabbit/src/rabbit_auth_mechanism_cr_demo.erl
new file mode 100644
index 0000000000..15439c461f
--- /dev/null
+++ b/deps/rabbit/src/rabbit_auth_mechanism_cr_demo.erl
@@ -0,0 +1,48 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_auth_mechanism_cr_demo).
+-include("rabbit.hrl").
+
+-behaviour(rabbit_auth_mechanism).
+
+-export([description/0, should_offer/1, init/1, handle_response/2]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "auth mechanism cr-demo"},
+ {mfa, {rabbit_registry, register,
+ [auth_mechanism, <<"RABBIT-CR-DEMO">>,
+ ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+-record(state, {username = undefined}).
+
+%% Provides equivalent security to PLAIN but demos use of Connection.Secure(Ok)
+%% START-OK: Username
+%% SECURE: "Please tell me your password"
+%% SECURE-OK: "My password is ~s", [Password]
+
+description() ->
+ [{description, <<"RabbitMQ Demo challenge-response authentication "
+ "mechanism">>}].
+
+should_offer(_Sock) ->
+ true.
+
+init(_Sock) ->
+ #state{}.
+
+handle_response(Response, State = #state{username = undefined}) ->
+ {challenge, <<"Please tell me your password">>,
+ State#state{username = Response}};
+
+handle_response(<<"My password is ", Password/binary>>,
+ #state{username = Username}) ->
+ rabbit_access_control:check_user_pass_login(Username, Password);
+handle_response(Response, _State) ->
+ {protocol_error, "Invalid response '~s'", [Response]}.
diff --git a/deps/rabbit/src/rabbit_auth_mechanism_plain.erl b/deps/rabbit/src/rabbit_auth_mechanism_plain.erl
new file mode 100644
index 0000000000..d704c72400
--- /dev/null
+++ b/deps/rabbit/src/rabbit_auth_mechanism_plain.erl
@@ -0,0 +1,60 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_auth_mechanism_plain).
+-include("rabbit.hrl").
+
+-behaviour(rabbit_auth_mechanism).
+
+-export([description/0, should_offer/1, init/1, handle_response/2]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "auth mechanism plain"},
+ {mfa, {rabbit_registry, register,
+ [auth_mechanism, <<"PLAIN">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+%% SASL PLAIN, as used by the Qpid Java client and our clients. Also,
+%% apparently, by OpenAMQ.
+
+description() ->
+ [{description, <<"SASL PLAIN authentication mechanism">>}].
+
+should_offer(_Sock) ->
+ true.
+
+init(_Sock) ->
+ [].
+
+handle_response(Response, _State) ->
+ case extract_user_pass(Response) of
+ {ok, User, Pass} ->
+ rabbit_access_control:check_user_pass_login(User, Pass);
+ error ->
+ {protocol_error, "response ~p invalid", [Response]}
+ end.
+
+extract_user_pass(Response) ->
+ case extract_elem(Response) of
+ {ok, User, Response1} -> case extract_elem(Response1) of
+ {ok, Pass, <<>>} -> {ok, User, Pass};
+ _ -> error
+ end;
+ error -> error
+ end.
+
+extract_elem(<<0:8, Rest/binary>>) ->
+ Count = next_null_pos(Rest, 0),
+ <<Elem:Count/binary, Rest1/binary>> = Rest,
+ {ok, Elem, Rest1};
+extract_elem(_) ->
+ error.
+
+next_null_pos(<<>>, Count) -> Count;
+next_null_pos(<<0:8, _Rest/binary>>, Count) -> Count;
+next_null_pos(<<_:8, Rest/binary>>, Count) -> next_null_pos(Rest, Count + 1).
diff --git a/deps/rabbit/src/rabbit_autoheal.erl b/deps/rabbit/src/rabbit_autoheal.erl
new file mode 100644
index 0000000000..6380d71895
--- /dev/null
+++ b/deps/rabbit/src/rabbit_autoheal.erl
@@ -0,0 +1,456 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_autoheal).
+
+-export([init/0, enabled/0, maybe_start/1, rabbit_down/2, node_down/2,
+ handle_msg/3, process_down/2]).
+
+%% The named process we are running in.
+-define(SERVER, rabbit_node_monitor).
+
+-define(MNESIA_STOPPED_PING_INTERNAL, 200).
+
+-define(AUTOHEAL_STATE_AFTER_RESTART, rabbit_autoheal_state_after_restart).
+
+%%----------------------------------------------------------------------------
+
+%% In order to autoheal we want to:
+%%
+%% * Find the winning partition
+%% * Stop all nodes in other partitions
+%% * Wait for them all to be stopped
+%% * Start them again
+%%
+%% To keep things simple, we assume all nodes are up. We don't start
+%% unless all nodes are up, and if a node goes down we abandon the
+%% whole process. To further keep things simple we also defer the
+%% decision as to the winning node to the "leader" - arbitrarily
+%% selected as the first node in the cluster.
+%%
+%% To coordinate the restarting nodes we pick a special node from the
+%% winning partition - the "winner". Restarting nodes then stop, and
+%% wait for it to tell them it is safe to start again. The winner
+%% determines that a node has stopped just by seeing if its rabbit app
+%% stops - if a node stops for any other reason it just gets a message
+%% it will ignore, and otherwise we carry on.
+%%
+%% Meanwhile, the leader may continue to receive new autoheal requests:
+%% all of them are ignored. The winner notifies the leader when the
+%% current autoheal process is finished (ie. when all losers stopped and
+%% were asked to start again) or was aborted. When the leader receives
+%% the notification or if it looses contact with the winner, it can
+%% accept new autoheal requests.
+%%
+%% The winner and the leader are not necessarily the same node.
+%%
+%% The leader can be a loser and will restart in this case. It remembers
+%% there is an autoheal in progress by temporarily saving the autoheal
+%% state to the application environment.
+%%
+%% == Possible states ==
+%%
+%% not_healing
+%% - the default
+%%
+%% {winner_waiting, OutstandingStops, Notify}
+%% - we are the winner and are waiting for all losing nodes to stop
+%% before telling them they can restart
+%%
+%% {leader_waiting, Winner, Notify}
+%% - we are the leader, and have already assigned the winner and losers.
+%% We are waiting for a confirmation from the winner that the autoheal
+%% process has ended. Meanwhile we can ignore autoheal requests.
+%% Because we may be a loser too, this state is saved to the application
+%% environment and restored on startup.
+%%
+%% restarting
+%% - we are restarting. Of course the node monitor immediately dies
+%% then so this state does not last long. We therefore send the
+%% autoheal_safe_to_start message to the rabbit_outside_app_process
+%% instead.
+%%
+%% == Message flow ==
+%%
+%% 1. Any node (leader included) >> {request_start, node()} >> Leader
+%% When Mnesia detects it is running partitioned or
+%% when a remote node starts, rabbit_node_monitor calls
+%% rabbit_autoheal:maybe_start/1. The message above is sent to the
+%% leader so the leader can take a decision.
+%%
+%% 2. Leader >> {become_winner, Losers} >> Winner
+%% The leader notifies the winner so the latter can proceed with
+%% the autoheal.
+%%
+%% 3. Winner >> {winner_is, Winner} >> All losers
+%% The winner notifies losers they must stop.
+%%
+%% 4. Winner >> autoheal_safe_to_start >> All losers
+%% When either all losers stopped or the autoheal process was
+%% aborted, the winner notifies losers they can start again.
+%%
+%% 5. Leader >> report_autoheal_status >> Winner
+%% The leader asks the autoheal status to the winner. This only
+%% happens when the leader is a loser too. If this is not the case,
+%% this message is never sent.
+%%
+%% 6. Winner >> {autoheal_finished, Winner} >> Leader
+%% The winner notifies the leader that the autoheal process was
+%% either finished or aborted (ie. autoheal_safe_to_start was sent
+%% to losers).
+
+%%----------------------------------------------------------------------------
+
+init() ->
+ %% We check the application environment for a saved autoheal state
+ %% saved during a restart. If this node is a leader, it is used
+ %% to determine if it needs to ask the winner to report about the
+ %% autoheal progress.
+ State = case application:get_env(rabbit, ?AUTOHEAL_STATE_AFTER_RESTART) of
+ {ok, S} -> S;
+ undefined -> not_healing
+ end,
+ ok = application:unset_env(rabbit, ?AUTOHEAL_STATE_AFTER_RESTART),
+ case State of
+ {leader_waiting, Winner, _} ->
+ rabbit_log:info(
+ "Autoheal: in progress, requesting report from ~p~n", [Winner]),
+ send(Winner, report_autoheal_status);
+ _ ->
+ ok
+ end,
+ State.
+
+maybe_start(not_healing) ->
+ case enabled() of
+ true -> Leader = leader(),
+ send(Leader, {request_start, node()}),
+ rabbit_log:info("Autoheal request sent to ~p~n", [Leader]),
+ not_healing;
+ false -> not_healing
+ end;
+maybe_start(State) ->
+ State.
+
+enabled() ->
+ case application:get_env(rabbit, cluster_partition_handling) of
+ {ok, autoheal} -> true;
+ {ok, {pause_if_all_down, _, autoheal}} -> true;
+ _ -> false
+ end.
+
+leader() ->
+ [Leader | _] = lists:usort(rabbit_mnesia:cluster_nodes(all)),
+ Leader.
+
+%% This is the winner receiving its last notification that a node has
+%% stopped - all nodes can now start again
+rabbit_down(Node, {winner_waiting, [Node], Notify}) ->
+ rabbit_log:info("Autoheal: final node has stopped, starting...~n",[]),
+ winner_finish(Notify);
+
+rabbit_down(Node, {winner_waiting, WaitFor, Notify}) ->
+ {winner_waiting, WaitFor -- [Node], Notify};
+
+rabbit_down(Winner, {leader_waiting, Winner, Losers}) ->
+ abort([Winner], Losers);
+
+rabbit_down(_Node, State) ->
+ %% Ignore. Either:
+ %% o we already cancelled the autoheal process;
+ %% o we are still waiting the winner's report.
+ State.
+
+node_down(_Node, not_healing) ->
+ not_healing;
+
+node_down(Node, {winner_waiting, _, Notify}) ->
+ abort([Node], Notify);
+
+node_down(Node, {leader_waiting, Node, _Notify}) ->
+ %% The winner went down, we don't know what to do so we simply abort.
+ rabbit_log:info("Autoheal: aborting - winner ~p went down~n", [Node]),
+ not_healing;
+
+node_down(Node, {leader_waiting, _, _} = St) ->
+ %% If it is a partial partition, the winner might continue with the
+ %% healing process. If it is a full partition, the winner will also
+ %% see it and abort. Let's wait for it.
+ rabbit_log:info("Autoheal: ~p went down, waiting for winner decision ~n", [Node]),
+ St;
+
+node_down(Node, _State) ->
+ rabbit_log:info("Autoheal: aborting - ~p went down~n", [Node]),
+ not_healing.
+
+%% If the process that has to restart the node crashes for an unexpected reason,
+%% we go back to a not healing state so the node is able to recover.
+process_down({'EXIT', Pid, Reason}, {restarting, Pid}) when Reason =/= normal ->
+ rabbit_log:info("Autoheal: aborting - the process responsible for restarting the "
+ "node terminated with reason: ~p~n", [Reason]),
+ not_healing;
+
+process_down(_, State) ->
+ State.
+
+%% By receiving this message we become the leader
+%% TODO should we try to debounce this?
+handle_msg({request_start, Node},
+ not_healing, Partitions) ->
+ rabbit_log:info("Autoheal request received from ~p~n", [Node]),
+ case check_other_nodes(Partitions) of
+ {error, E} ->
+ rabbit_log:info("Autoheal request denied: ~s~n", [fmt_error(E)]),
+ not_healing;
+ {ok, AllPartitions} ->
+ {Winner, Losers} = make_decision(AllPartitions),
+ rabbit_log:info("Autoheal decision~n"
+ " * Partitions: ~p~n"
+ " * Winner: ~p~n"
+ " * Losers: ~p~n",
+ [AllPartitions, Winner, Losers]),
+ case node() =:= Winner of
+ true -> handle_msg({become_winner, Losers},
+ not_healing, Partitions);
+ false -> send(Winner, {become_winner, Losers}),
+ {leader_waiting, Winner, Losers}
+ end
+ end;
+
+handle_msg({request_start, Node},
+ State, _Partitions) ->
+ rabbit_log:info("Autoheal request received from ~p when healing; "
+ "ignoring~n", [Node]),
+ State;
+
+handle_msg({become_winner, Losers},
+ not_healing, _Partitions) ->
+ rabbit_log:info("Autoheal: I am the winner, waiting for ~p to stop~n",
+ [Losers]),
+ stop_partition(Losers);
+
+handle_msg({become_winner, Losers},
+ {winner_waiting, _, Losers}, _Partitions) ->
+ %% The leader has aborted the healing, might have seen us down but
+ %% we didn't see the same. Let's try again as it is the same partition.
+ rabbit_log:info("Autoheal: I am the winner and received a duplicated "
+ "request, waiting again for ~p to stop~n", [Losers]),
+ stop_partition(Losers);
+
+handle_msg({become_winner, _},
+ {winner_waiting, _, Losers}, _Partitions) ->
+ %% Something has happened to the leader, it might have seen us down but we
+ %% are still alive. Partitions have changed, cannot continue.
+ rabbit_log:info("Autoheal: I am the winner and received another healing "
+ "request, partitions have changed to ~p. Aborting ~n", [Losers]),
+ winner_finish(Losers),
+ not_healing;
+
+handle_msg({winner_is, Winner}, State = not_healing,
+ _Partitions) ->
+ %% This node is a loser, nothing else.
+ Pid = restart_loser(State, Winner),
+ {restarting, Pid};
+handle_msg({winner_is, Winner}, State = {leader_waiting, Winner, _},
+ _Partitions) ->
+ %% This node is the leader and a loser at the same time.
+ Pid = restart_loser(State, Winner),
+ {restarting, Pid};
+
+handle_msg(Request, {restarting, Pid} = St, _Partitions) ->
+ %% ignore, we can contribute no further
+ rabbit_log:info("Autoheal: Received the request ~p while waiting for ~p "
+ "to restart the node. Ignoring it ~n", [Request, Pid]),
+ St;
+
+handle_msg(report_autoheal_status, not_healing, _Partitions) ->
+ %% The leader is asking about the autoheal status to us (the
+ %% winner). This happens when the leader is a loser and it just
+ %% restarted. We are in the "not_healing" state, so the previous
+ %% autoheal process ended: let's tell this to the leader.
+ send(leader(), {autoheal_finished, node()}),
+ not_healing;
+
+handle_msg(report_autoheal_status, State, _Partitions) ->
+ %% Like above, the leader is asking about the autoheal status. We
+ %% are not finished with it. There is no need to send anything yet
+ %% to the leader: we will send the notification when it is over.
+ State;
+
+handle_msg({autoheal_finished, Winner},
+ {leader_waiting, Winner, _}, _Partitions) ->
+ %% The winner is finished with the autoheal process and notified us
+ %% (the leader). We can transition to the "not_healing" state and
+ %% accept new requests.
+ rabbit_log:info("Autoheal finished according to winner ~p~n", [Winner]),
+ not_healing;
+
+handle_msg({autoheal_finished, Winner}, not_healing, _Partitions)
+ when Winner =:= node() ->
+ %% We are the leader and the winner. The state already transitioned
+ %% to "not_healing" at the end of the autoheal process.
+ rabbit_log:info("Autoheal finished according to winner ~p~n", [node()]),
+ not_healing;
+
+handle_msg({autoheal_finished, Winner}, not_healing, _Partitions) ->
+ %% We might have seen the winner down during a partial partition and
+ %% transitioned to not_healing. However, the winner was still able
+ %% to finish. Let it pass.
+ rabbit_log:info("Autoheal finished according to winner ~p."
+ " Unexpected, I might have previously seen the winner down~n", [Winner]),
+ not_healing.
+
+%%----------------------------------------------------------------------------
+
+send(Node, Msg) -> {?SERVER, Node} ! {autoheal_msg, Msg}.
+
+abort(Down, Notify) ->
+ rabbit_log:info("Autoheal: aborting - ~p down~n", [Down]),
+ %% Make sure any nodes waiting for us start - it won't necessarily
+ %% heal the partition but at least they won't get stuck.
+ %% If we are executing this, we are not stopping. Thus, don't wait
+ %% for ourselves!
+ winner_finish(Notify -- [node()]).
+
+winner_finish(Notify) ->
+ %% There is a race in Mnesia causing a starting loser to hang
+ %% forever if another loser stops at the same time: the starting
+ %% node connects to the other node, negotiates the protocol and
+ %% attempts to acquire a write lock on the schema on the other node.
+ %% If the other node stops between the protocol negotiation and lock
+ %% request, the starting node never gets an answer to its lock
+ %% request.
+ %%
+ %% To work around the problem, we make sure Mnesia is stopped on all
+ %% losing nodes before sending the "autoheal_safe_to_start" signal.
+ wait_for_mnesia_shutdown(Notify),
+ [{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify],
+ send(leader(), {autoheal_finished, node()}),
+ not_healing.
+
+%% This improves the previous implementation, but could still potentially enter an infinity
+%% loop. If it also possible that for when it finishes some of the nodes have been
+%% manually restarted, but we can't do much more (apart from stop them again). So let it
+%% continue and notify all the losers to restart.
+wait_for_mnesia_shutdown(AllNodes) ->
+ Monitors = lists:foldl(fun(Node, Monitors0) ->
+ pmon:monitor({mnesia_sup, Node}, Monitors0)
+ end, pmon:new(), AllNodes),
+ wait_for_supervisors(Monitors).
+
+wait_for_supervisors(Monitors) ->
+ case pmon:is_empty(Monitors) of
+ true ->
+ ok;
+ false ->
+ receive
+ {'DOWN', _MRef, process, {mnesia_sup, _} = I, _Reason} ->
+ wait_for_supervisors(pmon:erase(I, Monitors))
+ after
+ 60000 ->
+ AliveLosers = [Node || {_, Node} <- pmon:monitored(Monitors)],
+ rabbit_log:info("Autoheal: mnesia in nodes ~p is still up, sending "
+ "winner notification again to these ~n", [AliveLosers]),
+ [send(L, {winner_is, node()}) || L <- AliveLosers],
+ wait_for_mnesia_shutdown(AliveLosers)
+ end
+ end.
+
+restart_loser(State, Winner) ->
+ rabbit_log:warning(
+ "Autoheal: we were selected to restart; winner is ~p~n", [Winner]),
+ NextStateTimeout = application:get_env(rabbit, autoheal_state_transition_timeout, 60000),
+ rabbit_node_monitor:run_outside_applications(
+ fun () ->
+ MRef = erlang:monitor(process, {?SERVER, Winner}),
+ rabbit:stop(),
+ NextState = receive
+ {'DOWN', MRef, process, {?SERVER, Winner}, _Reason} ->
+ not_healing;
+ autoheal_safe_to_start ->
+ State
+ after NextStateTimeout ->
+ rabbit_log:warning(
+ "Autoheal: timed out waiting for a safe-to-start message from the winner (~p); will retry",
+ [Winner]),
+ not_healing
+ end,
+ erlang:demonitor(MRef, [flush]),
+ %% During the restart, the autoheal state is lost so we
+ %% store it in the application environment temporarily so
+ %% init/0 can pick it up.
+ %%
+ %% This is useful to the leader which is a loser at the
+ %% same time: because the leader is restarting, there
+ %% is a great chance it misses the "autoheal finished!"
+ %% notification from the winner. Thanks to the saved
+ %% state, it knows it needs to ask the winner if the
+ %% autoheal process is finished or not.
+ application:set_env(rabbit,
+ ?AUTOHEAL_STATE_AFTER_RESTART, NextState),
+ rabbit:start()
+ end, true).
+
+make_decision(AllPartitions) ->
+ Sorted = lists:sort([{partition_value(P), P} || P <- AllPartitions]),
+ [[Winner | _] | Rest] = lists:reverse([P || {_, P} <- Sorted]),
+ {Winner, lists:append(Rest)}.
+
+partition_value(Partition) ->
+ Connections = [Res || Node <- Partition,
+ Res <- [rpc:call(Node, rabbit_networking,
+ connections_local, [])],
+ is_list(Res)],
+ {length(lists:append(Connections)), length(Partition)}.
+
+%% We have our local understanding of what partitions exist; but we
+%% only know which nodes we have been partitioned from, not which
+%% nodes are partitioned from each other.
+check_other_nodes(LocalPartitions) ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ {Results, Bad} = rabbit_node_monitor:status(Nodes -- [node()]),
+ RemotePartitions = [{Node, proplists:get_value(partitions, Res)}
+ || {Node, Res} <- Results],
+ RemoteDown = [{Node, Down}
+ || {Node, Res} <- Results,
+ Down <- [Nodes -- proplists:get_value(nodes, Res)],
+ Down =/= []],
+ case {Bad, RemoteDown} of
+ {[], []} -> Partitions = [{node(), LocalPartitions} | RemotePartitions],
+ {ok, all_partitions(Partitions, [Nodes])};
+ {[], _} -> {error, {remote_down, RemoteDown}};
+ {_, _} -> {error, {nodes_down, Bad}}
+ end.
+
+all_partitions([], Partitions) ->
+ Partitions;
+all_partitions([{Node, CantSee} | Rest], Partitions) ->
+ {[Containing], Others} =
+ lists:partition(fun (Part) -> lists:member(Node, Part) end, Partitions),
+ A = Containing -- CantSee,
+ B = Containing -- A,
+ Partitions1 = case {A, B} of
+ {[], _} -> Partitions;
+ {_, []} -> Partitions;
+ _ -> [A, B | Others]
+ end,
+ all_partitions(Rest, Partitions1).
+
+fmt_error({remote_down, RemoteDown}) ->
+ rabbit_misc:format("Remote nodes disconnected:~n ~p", [RemoteDown]);
+fmt_error({nodes_down, NodesDown}) ->
+ rabbit_misc:format("Local nodes down: ~p", [NodesDown]).
+
+stop_partition(Losers) ->
+ %% The leader said everything was ready - do we agree? If not then
+ %% give up.
+ Down = Losers -- rabbit_node_monitor:alive_rabbit_nodes(Losers),
+ case Down of
+ [] -> [send(L, {winner_is, node()}) || L <- Losers],
+ {winner_waiting, Losers, Losers};
+ _ -> abort(Down, Losers)
+ end.
diff --git a/deps/rabbit/src/rabbit_backing_queue.erl b/deps/rabbit/src/rabbit_backing_queue.erl
new file mode 100644
index 0000000000..4d709e14d0
--- /dev/null
+++ b/deps/rabbit/src/rabbit_backing_queue.erl
@@ -0,0 +1,264 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_backing_queue).
+
+-export([info_keys/0]).
+
+-define(INFO_KEYS, [messages_ram, messages_ready_ram,
+ messages_unacknowledged_ram, messages_persistent,
+ message_bytes, message_bytes_ready,
+ message_bytes_unacknowledged, message_bytes_ram,
+ message_bytes_persistent, head_message_timestamp,
+ disk_reads, disk_writes, backing_queue_status,
+ messages_paged_out, message_bytes_paged_out]).
+
+%% We can't specify a per-queue ack/state with callback signatures
+-type ack() :: any().
+-type state() :: any().
+
+-type flow() :: 'flow' | 'noflow'.
+-type msg_ids() :: [rabbit_types:msg_id()].
+-type publish() :: {rabbit_types:basic_message(),
+ rabbit_types:message_properties(), boolean()}.
+-type delivered_publish() :: {rabbit_types:basic_message(),
+ rabbit_types:message_properties()}.
+-type fetch_result(Ack) ::
+ ('empty' | {rabbit_types:basic_message(), boolean(), Ack}).
+-type drop_result(Ack) ::
+ ('empty' | {rabbit_types:msg_id(), Ack}).
+-type recovery_terms() :: [term()] | 'non_clean_shutdown'.
+-type recovery_info() :: 'new' | recovery_terms().
+-type purged_msg_count() :: non_neg_integer().
+-type async_callback() ::
+ fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok').
+-type duration() :: ('undefined' | 'infinity' | number()).
+
+-type msg_fun(A) :: fun ((rabbit_types:basic_message(), ack(), A) -> A).
+-type msg_pred() :: fun ((rabbit_types:message_properties()) -> boolean()).
+
+-type queue_mode() :: atom().
+
+%% Called on startup with a vhost and a list of durable queue names on this vhost.
+%% The queues aren't being started at this point, but this call allows the
+%% backing queue to perform any checking necessary for the consistency
+%% of those queues, or initialise any other shared resources.
+%%
+%% The list of queue recovery terms returned as {ok, Terms} must be given
+%% in the same order as the list of queue names supplied.
+-callback start(rabbit_types:vhost(), [rabbit_amqqueue:name()]) -> rabbit_types:ok(recovery_terms()).
+
+%% Called to tear down any state/resources for vhost. NB: Implementations should
+%% not depend on this function being called on shutdown and instead
+%% should hook into the rabbit supervision hierarchy.
+-callback stop(rabbit_types:vhost()) -> 'ok'.
+
+%% Initialise the backing queue and its state.
+%%
+%% Takes
+%% 1. the amqqueue record
+%% 2. a term indicating whether the queue is an existing queue that
+%% should be recovered or not. When 'new' is given, no recovery is
+%% taking place, otherwise a list of recovery terms is given, or
+%% the atom 'non_clean_shutdown' if no recovery terms are available.
+%% 3. an asynchronous callback which accepts a function of type
+%% backing-queue-state to backing-queue-state. This callback
+%% function can be safely invoked from any process, which makes it
+%% useful for passing messages back into the backing queue,
+%% especially as the backing queue does not have control of its own
+%% mailbox.
+-callback init(amqqueue:amqqueue(), recovery_info(),
+ async_callback()) -> state().
+
+%% Called on queue shutdown when queue isn't being deleted.
+-callback terminate(any(), state()) -> state().
+
+%% Called when the queue is terminating and needs to delete all its
+%% content.
+-callback delete_and_terminate(any(), state()) -> state().
+
+%% Called to clean up after a crashed queue. In this case we don't
+%% have a process and thus a state(), we are just removing on-disk data.
+-callback delete_crashed(amqqueue:amqqueue()) -> 'ok'.
+
+%% Remove all 'fetchable' messages from the queue, i.e. all messages
+%% except those that have been fetched already and are pending acks.
+-callback purge(state()) -> {purged_msg_count(), state()}.
+
+%% Remove all messages in the queue which have been fetched and are
+%% pending acks.
+-callback purge_acks(state()) -> state().
+
+%% Publish a message.
+-callback publish(rabbit_types:basic_message(),
+ rabbit_types:message_properties(), boolean(), pid(), flow(),
+ state()) -> state().
+
+%% Like publish/6 but for batches of publishes.
+-callback batch_publish([publish()], pid(), flow(), state()) -> state().
+
+%% Called for messages which have already been passed straight
+%% out to a client. The queue will be empty for these calls
+%% (i.e. saves the round trip through the backing queue).
+-callback publish_delivered(rabbit_types:basic_message(),
+ rabbit_types:message_properties(), pid(), flow(),
+ state())
+ -> {ack(), state()}.
+
+%% Like publish_delivered/5 but for batches of publishes.
+-callback batch_publish_delivered([delivered_publish()], pid(), flow(),
+ state())
+ -> {[ack()], state()}.
+
+%% Called to inform the BQ about messages which have reached the
+%% queue, but are not going to be further passed to BQ.
+-callback discard(rabbit_types:msg_id(), pid(), flow(), state()) -> state().
+
+%% Return ids of messages which have been confirmed since the last
+%% invocation of this function (or initialisation).
+%%
+%% Message ids should only appear in the result of drain_confirmed
+%% under the following circumstances:
+%%
+%% 1. The message appears in a call to publish_delivered/4 and the
+%% first argument (ack_required) is false; or
+%% 2. The message is fetched from the queue with fetch/2 and the first
+%% argument (ack_required) is false; or
+%% 3. The message is acked (ack/2 is called for the message); or
+%% 4. The message is fully fsync'd to disk in such a way that the
+%% recovery of the message is guaranteed in the event of a crash of
+%% this rabbit node (excluding hardware failure).
+%%
+%% In addition to the above conditions, a message id may only appear
+%% in the result of drain_confirmed if
+%% #message_properties.needs_confirming = true when the msg was
+%% published (through whichever means) to the backing queue.
+%%
+%% It is legal for the same message id to appear in the results of
+%% multiple calls to drain_confirmed, which means that the backing
+%% queue is not required to keep track of which messages it has
+%% already confirmed. The confirm will be issued to the publisher the
+%% first time the message id appears in the result of
+%% drain_confirmed. All subsequent appearances of that message id will
+%% be ignored.
+-callback drain_confirmed(state()) -> {msg_ids(), state()}.
+
+%% Drop messages from the head of the queue while the supplied
+%% predicate on message properties returns true. Returns the first
+%% message properties for which the predicate returned false, or
+%% 'undefined' if the whole backing queue was traversed w/o the
+%% predicate ever returning false.
+-callback dropwhile(msg_pred(), state())
+ -> {rabbit_types:message_properties() | undefined, state()}.
+
+%% Like dropwhile, except messages are fetched in "require
+%% acknowledgement" mode and are passed, together with their ack tag,
+%% to the supplied function. The function is also fed an
+%% accumulator. The result of fetchwhile is as for dropwhile plus the
+%% accumulator.
+-callback fetchwhile(msg_pred(), msg_fun(A), A, state())
+ -> {rabbit_types:message_properties() | undefined,
+ A, state()}.
+
+%% Produce the next message.
+-callback fetch(true, state()) -> {fetch_result(ack()), state()};
+ (false, state()) -> {fetch_result(undefined), state()}.
+
+%% Remove the next message.
+-callback drop(true, state()) -> {drop_result(ack()), state()};
+ (false, state()) -> {drop_result(undefined), state()}.
+
+%% Acktags supplied are for messages which can now be forgotten
+%% about. Must return 1 msg_id per Ack, in the same order as Acks.
+-callback ack([ack()], state()) -> {msg_ids(), state()}.
+
+%% Reinsert messages into the queue which have already been delivered
+%% and were pending acknowledgement.
+-callback requeue([ack()], state()) -> {msg_ids(), state()}.
+
+%% Fold over messages by ack tag. The supplied function is called with
+%% each message, its ack tag, and an accumulator.
+-callback ackfold(msg_fun(A), A, state(), [ack()]) -> {A, state()}.
+
+%% Fold over all the messages in a queue and return the accumulated
+%% results, leaving the queue undisturbed.
+-callback fold(fun((rabbit_types:basic_message(),
+ rabbit_types:message_properties(),
+ boolean(), A) -> {('stop' | 'cont'), A}),
+ A, state()) -> {A, state()}.
+
+%% How long is my queue?
+-callback len(state()) -> non_neg_integer().
+
+%% Is my queue empty?
+-callback is_empty(state()) -> boolean().
+
+%% What's the queue depth, where depth = length + number of pending acks
+-callback depth(state()) -> non_neg_integer().
+
+%% For the next three functions, the assumption is that you're
+%% monitoring something like the ingress and egress rates of the
+%% queue. The RAM duration is thus the length of time represented by
+%% the messages held in RAM given the current rates. If you want to
+%% ignore all of this stuff, then do so, and return 0 in
+%% ram_duration/1.
+
+%% The target is to have no more messages in RAM than indicated by the
+%% duration and the current queue rates.
+-callback set_ram_duration_target(duration(), state()) -> state().
+
+%% Optionally recalculate the duration internally (likely to be just
+%% update your internal rates), and report how many seconds the
+%% messages in RAM represent given the current rates of the queue.
+-callback ram_duration(state()) -> {duration(), state()}.
+
+%% Should 'timeout' be called as soon as the queue process can manage
+%% (either on an empty mailbox, or when a timer fires)?
+-callback needs_timeout(state()) -> 'false' | 'timed' | 'idle'.
+
+%% Called (eventually) after needs_timeout returns 'idle' or 'timed'.
+%% Note this may be called more than once for each 'idle' or 'timed'
+%% returned from needs_timeout
+-callback timeout(state()) -> state().
+
+%% Called immediately before the queue hibernates.
+-callback handle_pre_hibernate(state()) -> state().
+
+%% Called when more credit has become available for credit_flow.
+-callback resume(state()) -> state().
+
+%% Used to help prioritisation in rabbit_amqqueue_process. The rate of
+%% inbound messages and outbound messages at the moment.
+-callback msg_rates(state()) -> {float(), float()}.
+
+-callback info(atom(), state()) -> any().
+
+%% Passed a function to be invoked with the relevant backing queue's
+%% state. Useful for when the backing queue or other components need
+%% to pass functions into the backing queue.
+-callback invoke(atom(), fun ((atom(), A) -> A), state()) -> state().
+
+%% Called prior to a publish or publish_delivered call. Allows the BQ
+%% to signal that it's already seen this message, (e.g. it was published
+%% or discarded previously) specifying whether to drop the message or reject it.
+-callback is_duplicate(rabbit_types:basic_message(), state())
+ -> {{true, drop} | {true, reject} | boolean(), state()}.
+
+-callback set_queue_mode(queue_mode(), state()) -> state().
+
+-callback zip_msgs_and_acks([delivered_publish()],
+ [ack()], Acc, state())
+ -> Acc.
+
+%% Called when rabbit_amqqueue_process receives a message via
+%% handle_info and it should be processed by the backing
+%% queue
+-callback handle_info(term(), state()) -> state().
+
+-spec info_keys() -> rabbit_types:info_keys().
+
+info_keys() -> ?INFO_KEYS.
diff --git a/deps/rabbit/src/rabbit_basic.erl b/deps/rabbit/src/rabbit_basic.erl
new file mode 100644
index 0000000000..cdc9e082e4
--- /dev/null
+++ b/deps/rabbit/src/rabbit_basic.erl
@@ -0,0 +1,354 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_basic).
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+
+-export([publish/4, publish/5, publish/1,
+ message/3, message/4, properties/1, prepend_table_header/3,
+ extract_headers/1, extract_timestamp/1, map_headers/2, delivery/4,
+ header_routes/1, parse_expiration/1, header/2, header/3]).
+-export([build_content/2, from_content/1, msg_size/1,
+ maybe_gc_large_msg/1, maybe_gc_large_msg/2]).
+-export([add_header/4,
+ peek_fmt_message/1]).
+
+%%----------------------------------------------------------------------------
+
+-type properties_input() ::
+ rabbit_framing:amqp_property_record() | [{atom(), any()}].
+-type publish_result() ::
+ ok | rabbit_types:error('not_found').
+-type header() :: any().
+-type headers() :: rabbit_framing:amqp_table() | 'undefined'.
+
+-type exchange_input() :: rabbit_types:exchange() | rabbit_exchange:name().
+-type body_input() :: binary() | [binary()].
+
+%%----------------------------------------------------------------------------
+
+%% Convenience function, for avoiding round-trips in calls across the
+%% erlang distributed network.
+
+-spec publish
+ (exchange_input(), rabbit_router:routing_key(), properties_input(),
+ body_input()) ->
+ publish_result().
+
+publish(Exchange, RoutingKeyBin, Properties, Body) ->
+ publish(Exchange, RoutingKeyBin, false, Properties, Body).
+
+%% Convenience function, for avoiding round-trips in calls across the
+%% erlang distributed network.
+
+-spec publish
+ (exchange_input(), rabbit_router:routing_key(), boolean(),
+ properties_input(), body_input()) ->
+ publish_result().
+
+publish(X = #exchange{name = XName}, RKey, Mandatory, Props, Body) ->
+ Message = message(XName, RKey, properties(Props), Body),
+ publish(X, delivery(Mandatory, false, Message, undefined));
+publish(XName, RKey, Mandatory, Props, Body) ->
+ Message = message(XName, RKey, properties(Props), Body),
+ publish(delivery(Mandatory, false, Message, undefined)).
+
+-spec publish(rabbit_types:delivery()) -> publish_result().
+
+publish(Delivery = #delivery{
+ message = #basic_message{exchange_name = XName}}) ->
+ case rabbit_exchange:lookup(XName) of
+ {ok, X} -> publish(X, Delivery);
+ Err -> Err
+ end.
+
+publish(X, Delivery) ->
+ Qs = rabbit_amqqueue:lookup(rabbit_exchange:route(X, Delivery)),
+ _ = rabbit_queue_type:deliver(Qs, Delivery, stateless),
+ ok.
+
+-spec delivery
+ (boolean(), boolean(), rabbit_types:message(), undefined | integer()) ->
+ rabbit_types:delivery().
+
+delivery(Mandatory, Confirm, Message, MsgSeqNo) ->
+ #delivery{mandatory = Mandatory, confirm = Confirm, sender = self(),
+ message = Message, msg_seq_no = MsgSeqNo, flow = noflow}.
+
+-spec build_content
+ (rabbit_framing:amqp_property_record(), binary() | [binary()]) ->
+ rabbit_types:content().
+
+build_content(Properties, BodyBin) when is_binary(BodyBin) ->
+ build_content(Properties, [BodyBin]);
+
+build_content(Properties, PFR) ->
+ %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1
+ {ClassId, _MethodId} =
+ rabbit_framing_amqp_0_9_1:method_id('basic.publish'),
+ #content{class_id = ClassId,
+ properties = Properties,
+ properties_bin = none,
+ protocol = none,
+ payload_fragments_rev = PFR}.
+
+-spec from_content
+ (rabbit_types:content()) ->
+ {rabbit_framing:amqp_property_record(), binary()}.
+
+from_content(Content) ->
+ #content{class_id = ClassId,
+ properties = Props,
+ payload_fragments_rev = FragmentsRev} =
+ rabbit_binary_parser:ensure_content_decoded(Content),
+ %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1
+ {ClassId, _MethodId} =
+ rabbit_framing_amqp_0_9_1:method_id('basic.publish'),
+ {Props, list_to_binary(lists:reverse(FragmentsRev))}.
+
+%% This breaks the spec rule forbidding message modification
+strip_header(#content{properties = #'P_basic'{headers = undefined}}
+ = DecodedContent, _Key) ->
+ DecodedContent;
+strip_header(#content{properties = Props = #'P_basic'{headers = Headers}}
+ = DecodedContent, Key) ->
+ case lists:keysearch(Key, 1, Headers) of
+ false -> DecodedContent;
+ {value, Found} -> Headers0 = lists:delete(Found, Headers),
+ rabbit_binary_generator:clear_encoded_content(
+ DecodedContent#content{
+ properties = Props#'P_basic'{
+ headers = Headers0}})
+ end.
+
+-spec message
+ (rabbit_exchange:name(), rabbit_router:routing_key(),
+ rabbit_types:decoded_content()) ->
+ rabbit_types:ok_or_error2(rabbit_types:message(), any()).
+
+message(XName, RoutingKey, #content{properties = Props} = DecodedContent) ->
+ try
+ {ok, #basic_message{
+ exchange_name = XName,
+ content = strip_header(DecodedContent, ?DELETED_HEADER),
+ id = rabbit_guid:gen(),
+ is_persistent = is_message_persistent(DecodedContent),
+ routing_keys = [RoutingKey |
+ header_routes(Props#'P_basic'.headers)]}}
+ catch
+ {error, _Reason} = Error -> Error
+ end.
+
+-spec message
+ (rabbit_exchange:name(), rabbit_router:routing_key(), properties_input(),
+ binary()) ->
+ rabbit_types:message().
+
+message(XName, RoutingKey, RawProperties, Body) ->
+ Properties = properties(RawProperties),
+ Content = build_content(Properties, Body),
+ {ok, Msg} = message(XName, RoutingKey, Content),
+ Msg.
+
+-spec properties
+ (properties_input()) -> rabbit_framing:amqp_property_record().
+
+properties(P = #'P_basic'{}) ->
+ P;
+properties(P) when is_list(P) ->
+ %% Yes, this is O(length(P) * record_info(size, 'P_basic') / 2),
+ %% i.e. slow. Use the definition of 'P_basic' directly if
+ %% possible!
+ lists:foldl(fun ({Key, Value}, Acc) ->
+ case indexof(record_info(fields, 'P_basic'), Key) of
+ 0 -> throw({unknown_basic_property, Key});
+ N -> setelement(N + 1, Acc, Value)
+ end
+ end, #'P_basic'{}, P).
+
+-spec prepend_table_header
+ (binary(), rabbit_framing:amqp_table(), headers()) -> headers().
+
+prepend_table_header(Name, Info, undefined) ->
+ prepend_table_header(Name, Info, []);
+prepend_table_header(Name, Info, Headers) ->
+ case rabbit_misc:table_lookup(Headers, Name) of
+ {array, Existing} ->
+ prepend_table(Name, Info, Existing, Headers);
+ undefined ->
+ prepend_table(Name, Info, [], Headers);
+ Other ->
+ Headers2 = prepend_table(Name, Info, [], Headers),
+ set_invalid_header(Name, Other, Headers2)
+ end.
+
+prepend_table(Name, Info, Prior, Headers) ->
+ rabbit_misc:set_table_value(Headers, Name, array, [{table, Info} | Prior]).
+
+set_invalid_header(Name, {_, _}=Value, Headers) when is_list(Headers) ->
+ case rabbit_misc:table_lookup(Headers, ?INVALID_HEADERS_KEY) of
+ undefined ->
+ set_invalid([{Name, array, [Value]}], Headers);
+ {table, ExistingHdr} ->
+ update_invalid(Name, Value, ExistingHdr, Headers);
+ Other ->
+ %% somehow the x-invalid-headers header is corrupt
+ Invalid = [{?INVALID_HEADERS_KEY, array, [Other]}],
+ set_invalid_header(Name, Value, set_invalid(Invalid, Headers))
+ end.
+
+set_invalid(NewHdr, Headers) ->
+ rabbit_misc:set_table_value(Headers, ?INVALID_HEADERS_KEY, table, NewHdr).
+
+update_invalid(Name, Value, ExistingHdr, Header) ->
+ Values = case rabbit_misc:table_lookup(ExistingHdr, Name) of
+ undefined -> [Value];
+ {array, Prior} -> [Value | Prior]
+ end,
+ NewHdr = rabbit_misc:set_table_value(ExistingHdr, Name, array, Values),
+ set_invalid(NewHdr, Header).
+
+-spec header(header(), headers()) -> 'undefined' | any().
+
+header(_Header, undefined) ->
+ undefined;
+header(_Header, []) ->
+ undefined;
+header(Header, Headers) ->
+ header(Header, Headers, undefined).
+
+-spec header(header(), headers(), any()) -> 'undefined' | any().
+
+header(Header, Headers, Default) ->
+ case lists:keysearch(Header, 1, Headers) of
+ false -> Default;
+ {value, Val} -> Val
+ end.
+
+-spec extract_headers(rabbit_types:content()) -> headers().
+
+extract_headers(Content) ->
+ #content{properties = #'P_basic'{headers = Headers}} =
+ rabbit_binary_parser:ensure_content_decoded(Content),
+ Headers.
+
+extract_timestamp(Content) ->
+ #content{properties = #'P_basic'{timestamp = Timestamp}} =
+ rabbit_binary_parser:ensure_content_decoded(Content),
+ Timestamp.
+
+-spec map_headers
+ (fun((headers()) -> headers()), rabbit_types:content()) ->
+ rabbit_types:content().
+
+map_headers(F, Content) ->
+ Content1 = rabbit_binary_parser:ensure_content_decoded(Content),
+ #content{properties = #'P_basic'{headers = Headers} = Props} = Content1,
+ Headers1 = F(Headers),
+ rabbit_binary_generator:clear_encoded_content(
+ Content1#content{properties = Props#'P_basic'{headers = Headers1}}).
+
+indexof(L, Element) -> indexof(L, Element, 1).
+
+indexof([], _Element, _N) -> 0;
+indexof([Element | _Rest], Element, N) -> N;
+indexof([_ | Rest], Element, N) -> indexof(Rest, Element, N + 1).
+
+is_message_persistent(#content{properties = #'P_basic'{
+ delivery_mode = Mode}}) ->
+ case Mode of
+ 1 -> false;
+ 2 -> true;
+ undefined -> false;
+ Other -> throw({error, {delivery_mode_unknown, Other}})
+ end.
+
+%% Extract CC routes from headers
+
+-spec header_routes(undefined | rabbit_framing:amqp_table()) -> [string()].
+
+header_routes(undefined) ->
+ [];
+header_routes(HeadersTable) ->
+ lists:append(
+ [case rabbit_misc:table_lookup(HeadersTable, HeaderKey) of
+ {array, Routes} -> [Route || {longstr, Route} <- Routes];
+ undefined -> [];
+ {Type, _Val} -> throw({error, {unacceptable_type_in_header,
+ binary_to_list(HeaderKey), Type}})
+ end || HeaderKey <- ?ROUTING_HEADERS]).
+
+-spec parse_expiration
+ (rabbit_framing:amqp_property_record()) ->
+ rabbit_types:ok_or_error2('undefined' | non_neg_integer(), any()).
+
+parse_expiration(#'P_basic'{expiration = undefined}) ->
+ {ok, undefined};
+parse_expiration(#'P_basic'{expiration = Expiration}) ->
+ case string:to_integer(binary_to_list(Expiration)) of
+ {error, no_integer} = E ->
+ E;
+ {N, ""} ->
+ case rabbit_misc:check_expiry(N) of
+ ok -> {ok, N};
+ E = {error, _} -> E
+ end;
+ {_, S} ->
+ {error, {leftover_string, S}}
+ end.
+
+maybe_gc_large_msg(Content) ->
+ rabbit_writer:maybe_gc_large_msg(Content).
+
+maybe_gc_large_msg(Content, undefined) ->
+ rabbit_writer:msg_size(Content);
+maybe_gc_large_msg(Content, GCThreshold) ->
+ rabbit_writer:maybe_gc_large_msg(Content, GCThreshold).
+
+msg_size(Content) ->
+ rabbit_writer:msg_size(Content).
+
+add_header(Name, Type, Value, #basic_message{content = Content0} = Msg) ->
+ Content = rabbit_basic:map_headers(
+ fun(undefined) ->
+ rabbit_misc:set_table_value([], Name, Type, Value);
+ (Headers) ->
+ rabbit_misc:set_table_value(Headers, Name, Type, Value)
+ end, Content0),
+ Msg#basic_message{content = Content}.
+
+peek_fmt_message(#basic_message{exchange_name = Ex,
+ routing_keys = RKeys,
+ content =
+ #content{payload_fragments_rev = Payl0,
+ properties = Props}}) ->
+ Fields = [atom_to_binary(F, utf8) || F <- record_info(fields, 'P_basic')],
+ T = lists:zip(Fields, tl(tuple_to_list(Props))),
+ lists:foldl(
+ fun ({<<"headers">>, Hdrs}, Acc) ->
+ case Hdrs of
+ [] ->
+ Acc;
+ _ ->
+ Acc ++ [{header_key(H), V} || {H, _T, V} <- Hdrs]
+ end;
+ ({_, undefined}, Acc) ->
+ Acc;
+ (KV, Acc) ->
+ [KV | Acc]
+ end, [], [{<<"payload (max 64 bytes)">>,
+ %% restric payload to 64 bytes
+ binary_prefix_64(iolist_to_binary(lists:reverse(Payl0)), 64)},
+ {<<"exchange">>, Ex#resource.name},
+ {<<"routing_keys">>, RKeys} | T]).
+
+header_key(A) ->
+ <<"header.", A/binary>>.
+
+binary_prefix_64(Bin, Len) ->
+ binary:part(Bin, 0, min(byte_size(Bin), Len)).
diff --git a/deps/rabbit/src/rabbit_binding.erl b/deps/rabbit/src/rabbit_binding.erl
new file mode 100644
index 0000000000..6ef25c4e60
--- /dev/null
+++ b/deps/rabbit/src/rabbit_binding.erl
@@ -0,0 +1,691 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_binding).
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-export([recover/0, recover/2, exists/1, add/2, add/3, remove/1, remove/2, remove/3, remove/4]).
+-export([list/1, list_for_source/1, list_for_destination/1,
+ list_for_source_and_destination/2, list_explicit/0]).
+-export([new_deletions/0, combine_deletions/2, add_deletion/3,
+ process_deletions/2, binding_action/3]).
+-export([info_keys/0, info/1, info/2, info_all/1, info_all/2, info_all/4]).
+%% these must all be run inside a mnesia tx
+-export([has_for_source/1, remove_for_source/1,
+ remove_for_destination/2, remove_transient_for_destination/1,
+ remove_default_exchange_binding_rows_of/1]).
+
+-export([implicit_for_destination/1, reverse_binding/1]).
+-export([new/4]).
+
+-define(DEFAULT_EXCHANGE(VHostPath), #resource{virtual_host = VHostPath,
+ kind = exchange,
+ name = <<>>}).
+
+%%----------------------------------------------------------------------------
+
+-export_type([key/0, deletions/0]).
+
+-type key() :: binary().
+
+-type bind_errors() :: rabbit_types:error(
+ {'resources_missing',
+ [{'not_found', (rabbit_types:binding_source() |
+ rabbit_types:binding_destination())} |
+ {'absent', amqqueue:amqqueue()}]}).
+
+-type bind_ok_or_error() :: 'ok' | bind_errors() |
+ rabbit_types:error(
+ {'binding_invalid', string(), [any()]}).
+-type bind_res() :: bind_ok_or_error() | rabbit_misc:thunk(bind_ok_or_error()).
+-type inner_fun() ::
+ fun((rabbit_types:exchange(),
+ rabbit_types:exchange() | amqqueue:amqqueue()) ->
+ rabbit_types:ok_or_error(rabbit_types:amqp_error())).
+-type bindings() :: [rabbit_types:binding()].
+
+%% TODO this should really be opaque but that seems to confuse 17.1's
+%% dialyzer into objecting to everything that uses it.
+-type deletions() :: dict:dict().
+
+%%----------------------------------------------------------------------------
+
+-spec new(rabbit_types:exchange(),
+ key(),
+ rabbit_types:exchange() | amqqueue:amqqueue(),
+ rabbit_framing:amqp_table()) ->
+ rabbit_types:binding().
+
+new(Src, RoutingKey, Dst, #{}) ->
+ new(Src, RoutingKey, Dst, []);
+new(Src, RoutingKey, Dst, Arguments) when is_map(Arguments) ->
+ new(Src, RoutingKey, Dst, maps:to_list(Arguments));
+new(Src, RoutingKey, Dst, Arguments) ->
+ #binding{source = Src, key = RoutingKey, destination = Dst, args = Arguments}.
+
+
+-define(INFO_KEYS, [source_name, source_kind,
+ destination_name, destination_kind,
+ routing_key, arguments,
+ vhost]).
+
+%% Global table recovery
+
+-spec recover([rabbit_exchange:name()], [rabbit_amqqueue:name()]) ->
+ 'ok'.
+
+recover() ->
+ rabbit_misc:table_filter(
+ fun (Route) ->
+ mnesia:read({rabbit_semi_durable_route, Route}) =:= []
+ end,
+ fun (Route, true) ->
+ ok = mnesia:write(rabbit_semi_durable_route, Route, write);
+ (_Route, false) ->
+ ok
+ end, rabbit_durable_route).
+
+%% Virtual host-specific recovery
+recover(XNames, QNames) ->
+ XNameSet = sets:from_list(XNames),
+ QNameSet = sets:from_list(QNames),
+ SelectSet = fun (#resource{kind = exchange}) -> XNameSet;
+ (#resource{kind = queue}) -> QNameSet
+ end,
+ {ok, Gatherer} = gatherer:start_link(),
+ [recover_semi_durable_route(Gatherer, R, SelectSet(Dst)) ||
+ R = #route{binding = #binding{destination = Dst}} <-
+ rabbit_misc:dirty_read_all(rabbit_semi_durable_route)],
+ empty = gatherer:out(Gatherer),
+ ok = gatherer:stop(Gatherer),
+ ok.
+
+recover_semi_durable_route(Gatherer, R = #route{binding = B}, ToRecover) ->
+ #binding{source = Src, destination = Dst} = B,
+ case sets:is_element(Dst, ToRecover) of
+ true -> {ok, X} = rabbit_exchange:lookup(Src),
+ ok = gatherer:fork(Gatherer),
+ ok = worker_pool:submit_async(
+ fun () ->
+ recover_semi_durable_route_txn(R, X),
+ gatherer:finish(Gatherer)
+ end);
+ false -> ok
+ end.
+
+recover_semi_durable_route_txn(R = #route{binding = B}, X) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ case mnesia:read(rabbit_semi_durable_route, B, read) of
+ [] -> no_recover;
+ _ -> ok = sync_transient_route(R, fun mnesia:write/3),
+ rabbit_exchange:serial(X)
+ end
+ end,
+ fun (no_recover, _) -> ok;
+ (_Serial, true) -> x_callback(transaction, X, add_binding, B);
+ (Serial, false) -> x_callback(Serial, X, add_binding, B)
+ end).
+
+-spec exists(rabbit_types:binding()) -> boolean() | bind_errors().
+
+exists(#binding{source = ?DEFAULT_EXCHANGE(_),
+ destination = #resource{kind = queue, name = QName} = Queue,
+ key = QName,
+ args = []}) ->
+ case rabbit_amqqueue:lookup(Queue) of
+ {ok, _} -> true;
+ {error, not_found} -> false
+ end;
+exists(Binding) ->
+ binding_action(
+ Binding, fun (_Src, _Dst, B) ->
+ rabbit_misc:const(mnesia:read({rabbit_route, B}) /= [])
+ end, fun not_found_or_absent_errs/1).
+
+-spec add(rabbit_types:binding(), rabbit_types:username()) -> bind_res().
+
+add(Binding, ActingUser) -> add(Binding, fun (_Src, _Dst) -> ok end, ActingUser).
+
+-spec add(rabbit_types:binding(), inner_fun(), rabbit_types:username()) -> bind_res().
+
+add(Binding, InnerFun, ActingUser) ->
+ binding_action(
+ Binding,
+ fun (Src, Dst, B) ->
+ case rabbit_exchange:validate_binding(Src, B) of
+ ok ->
+ lock_resource(Src, read),
+ lock_resource(Dst, read),
+ %% this argument is used to check queue exclusivity;
+ %% in general, we want to fail on that in preference to
+ %% anything else
+ case InnerFun(Src, Dst) of
+ ok ->
+ case mnesia:read({rabbit_route, B}) of
+ [] -> add(Src, Dst, B, ActingUser);
+ [_] -> fun () -> ok end
+ end;
+ {error, _} = Err ->
+ rabbit_misc:const(Err)
+ end;
+ {error, _} = Err ->
+ rabbit_misc:const(Err)
+ end
+ end, fun not_found_or_absent_errs/1).
+
+add(Src, Dst, B, ActingUser) ->
+ [SrcDurable, DstDurable] = [durable(E) || E <- [Src, Dst]],
+ ok = sync_route(#route{binding = B}, SrcDurable, DstDurable,
+ fun mnesia:write/3),
+ x_callback(transaction, Src, add_binding, B),
+ Serial = rabbit_exchange:serial(Src),
+ fun () ->
+ x_callback(Serial, Src, add_binding, B),
+ ok = rabbit_event:notify(
+ binding_created,
+ info(B) ++ [{user_who_performed_action, ActingUser}])
+ end.
+
+-spec remove(rabbit_types:binding()) -> bind_res().
+remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end, ?INTERNAL_USER).
+
+-spec remove(rabbit_types:binding(), rabbit_types:username()) -> bind_res().
+remove(Binding, ActingUser) -> remove(Binding, fun (_Src, _Dst) -> ok end, ActingUser).
+
+
+-spec remove(rabbit_types:binding(), inner_fun(), rabbit_types:username()) -> bind_res().
+remove(Binding, InnerFun, ActingUser) ->
+ binding_action(
+ Binding,
+ fun (Src, Dst, B) ->
+ lock_resource(Src, read),
+ lock_resource(Dst, read),
+ case mnesia:read(rabbit_route, B, write) of
+ [] -> case mnesia:read(rabbit_durable_route, B, write) of
+ [] -> rabbit_misc:const(ok);
+ %% We still delete the binding and run
+ %% all post-delete functions if there is only
+ %% a durable route in the database
+ _ -> remove(Src, Dst, B, ActingUser)
+ end;
+ _ -> case InnerFun(Src, Dst) of
+ ok -> remove(Src, Dst, B, ActingUser);
+ {error, _} = Err -> rabbit_misc:const(Err)
+ end
+ end
+ end, fun absent_errs_only/1).
+
+remove(Src, Dst, B, ActingUser) ->
+ ok = sync_route(#route{binding = B}, durable(Src), durable(Dst),
+ fun delete/3),
+ Deletions = maybe_auto_delete(
+ B#binding.source, [B], new_deletions(), false),
+ process_deletions(Deletions, ActingUser).
+
+%% Implicit bindings are implicit as of rabbitmq/rabbitmq-server#1721.
+remove_default_exchange_binding_rows_of(Dst = #resource{}) ->
+ case implicit_for_destination(Dst) of
+ [Binding] ->
+ mnesia:dirty_delete(rabbit_durable_route, Binding),
+ mnesia:dirty_delete(rabbit_semi_durable_route, Binding),
+ mnesia:dirty_delete(rabbit_reverse_route,
+ reverse_binding(Binding)),
+ mnesia:dirty_delete(rabbit_route, Binding);
+ _ ->
+ %% no binding to remove or
+ %% a competing tx has beaten us to it?
+ ok
+ end,
+ ok.
+
+-spec list_explicit() -> bindings().
+
+list_explicit() ->
+ mnesia:async_dirty(
+ fun () ->
+ AllRoutes = mnesia:dirty_match_object(rabbit_route, #route{_ = '_'}),
+ %% if there are any default exchange bindings left after an upgrade
+ %% of a pre-3.8 database, filter them out
+ AllBindings = [B || #route{binding = B} <- AllRoutes],
+ lists:filter(fun(#binding{source = S}) ->
+ not (S#resource.kind =:= exchange andalso S#resource.name =:= <<>>)
+ end, AllBindings)
+ end).
+
+-spec list(rabbit_types:vhost()) -> bindings().
+
+list(VHostPath) ->
+ VHostResource = rabbit_misc:r(VHostPath, '_'),
+ Route = #route{binding = #binding{source = VHostResource,
+ destination = VHostResource,
+ _ = '_'},
+ _ = '_'},
+ %% if there are any default exchange bindings left after an upgrade
+ %% of a pre-3.8 database, filter them out
+ AllBindings = [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route,
+ Route)],
+ Filtered = lists:filter(fun(#binding{source = S}) ->
+ S =/= ?DEFAULT_EXCHANGE(VHostPath)
+ end, AllBindings),
+ implicit_bindings(VHostPath) ++ Filtered.
+
+-spec list_for_source
+ (rabbit_types:binding_source()) -> bindings().
+
+list_for_source(?DEFAULT_EXCHANGE(VHostPath)) ->
+ implicit_bindings(VHostPath);
+list_for_source(SrcName) ->
+ mnesia:async_dirty(
+ fun() ->
+ Route = #route{binding = #binding{source = SrcName, _ = '_'}},
+ [B || #route{binding = B}
+ <- mnesia:match_object(rabbit_route, Route, read)]
+ end).
+
+-spec list_for_destination
+ (rabbit_types:binding_destination()) -> bindings().
+
+list_for_destination(DstName = #resource{virtual_host = VHostPath}) ->
+ AllBindings = mnesia:async_dirty(
+ fun() ->
+ Route = #route{binding = #binding{destination = DstName,
+ _ = '_'}},
+ [reverse_binding(B) ||
+ #reverse_route{reverse_binding = B} <-
+ mnesia:match_object(rabbit_reverse_route,
+ reverse_route(Route), read)]
+ end),
+ Filtered = lists:filter(fun(#binding{source = S}) ->
+ S =/= ?DEFAULT_EXCHANGE(VHostPath)
+ end, AllBindings),
+ implicit_for_destination(DstName) ++ Filtered.
+
+implicit_bindings(VHostPath) ->
+ DstQueues = rabbit_amqqueue:list_names(VHostPath),
+ [ #binding{source = ?DEFAULT_EXCHANGE(VHostPath),
+ destination = DstQueue,
+ key = QName,
+ args = []}
+ || DstQueue = #resource{name = QName} <- DstQueues ].
+
+implicit_for_destination(DstQueue = #resource{kind = queue,
+ virtual_host = VHostPath,
+ name = QName}) ->
+ [#binding{source = ?DEFAULT_EXCHANGE(VHostPath),
+ destination = DstQueue,
+ key = QName,
+ args = []}];
+implicit_for_destination(_) ->
+ [].
+
+-spec list_for_source_and_destination
+ (rabbit_types:binding_source(), rabbit_types:binding_destination()) ->
+ bindings().
+
+list_for_source_and_destination(?DEFAULT_EXCHANGE(VHostPath),
+ #resource{kind = queue,
+ virtual_host = VHostPath,
+ name = QName} = DstQueue) ->
+ [#binding{source = ?DEFAULT_EXCHANGE(VHostPath),
+ destination = DstQueue,
+ key = QName,
+ args = []}];
+list_for_source_and_destination(SrcName, DstName) ->
+ mnesia:async_dirty(
+ fun() ->
+ Route = #route{binding = #binding{source = SrcName,
+ destination = DstName,
+ _ = '_'}},
+ [B || #route{binding = B} <- mnesia:match_object(rabbit_route,
+ Route, read)]
+ end).
+
+-spec info_keys() -> rabbit_types:info_keys().
+
+info_keys() -> ?INFO_KEYS.
+
+map(VHostPath, F) ->
+ %% TODO: there is scope for optimisation here, e.g. using a
+ %% cursor, parallelising the function invocation
+ lists:map(F, list(VHostPath)).
+
+infos(Items, B) -> [{Item, i(Item, B)} || Item <- Items].
+
+i(source_name, #binding{source = SrcName}) -> SrcName#resource.name;
+i(source_kind, #binding{source = SrcName}) -> SrcName#resource.kind;
+i(vhost, #binding{source = SrcName}) -> SrcName#resource.virtual_host;
+i(destination_name, #binding{destination = DstName}) -> DstName#resource.name;
+i(destination_kind, #binding{destination = DstName}) -> DstName#resource.kind;
+i(routing_key, #binding{key = RoutingKey}) -> RoutingKey;
+i(arguments, #binding{args = Arguments}) -> Arguments;
+i(Item, _) -> throw({bad_argument, Item}).
+
+-spec info(rabbit_types:binding()) -> rabbit_types:infos().
+
+info(B = #binding{}) -> infos(?INFO_KEYS, B).
+
+-spec info(rabbit_types:binding(), rabbit_types:info_keys()) ->
+ rabbit_types:infos().
+
+info(B = #binding{}, Items) -> infos(Items, B).
+
+-spec info_all(rabbit_types:vhost()) -> [rabbit_types:infos()].
+
+info_all(VHostPath) -> map(VHostPath, fun (B) -> info(B) end).
+
+-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys()) ->
+ [rabbit_types:infos()].
+
+info_all(VHostPath, Items) -> map(VHostPath, fun (B) -> info(B, Items) end).
+
+-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys(),
+ reference(), pid()) -> 'ok'.
+
+info_all(VHostPath, Items, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(
+ AggregatorPid, Ref, fun(B) -> info(B, Items) end, list(VHostPath)).
+
+-spec has_for_source(rabbit_types:binding_source()) -> boolean().
+
+has_for_source(SrcName) ->
+ Match = #route{binding = #binding{source = SrcName, _ = '_'}},
+ %% we need to check for semi-durable routes (which subsumes
+ %% durable routes) here too in case a bunch of routes to durable
+ %% queues have been removed temporarily as a result of a node
+ %% failure
+ contains(rabbit_route, Match) orelse
+ contains(rabbit_semi_durable_route, Match).
+
+-spec remove_for_source(rabbit_types:binding_source()) -> bindings().
+
+remove_for_source(SrcName) ->
+ lock_resource(SrcName),
+ Match = #route{binding = #binding{source = SrcName, _ = '_'}},
+ remove_routes(
+ lists:usort(
+ mnesia:dirty_match_object(rabbit_route, Match) ++
+ mnesia:dirty_match_object(rabbit_semi_durable_route, Match))).
+
+-spec remove_for_destination
+ (rabbit_types:binding_destination(), boolean()) -> deletions().
+
+remove_for_destination(DstName, OnlyDurable) ->
+ remove_for_destination(DstName, OnlyDurable, fun remove_routes/1).
+
+-spec remove_transient_for_destination
+ (rabbit_types:binding_destination()) -> deletions().
+
+remove_transient_for_destination(DstName) ->
+ remove_for_destination(DstName, false, fun remove_transient_routes/1).
+
+%%----------------------------------------------------------------------------
+
+durable(#exchange{durable = D}) -> D;
+durable(Q) when ?is_amqqueue(Q) ->
+ amqqueue:is_durable(Q).
+
+binding_action(Binding = #binding{source = SrcName,
+ destination = DstName,
+ args = Arguments}, Fun, ErrFun) ->
+ call_with_source_and_destination(
+ SrcName, DstName,
+ fun (Src, Dst) ->
+ SortedArgs = rabbit_misc:sort_field_table(Arguments),
+ Fun(Src, Dst, Binding#binding{args = SortedArgs})
+ end, ErrFun).
+
+sync_route(Route, true, true, Fun) ->
+ ok = Fun(rabbit_durable_route, Route, write),
+ sync_route(Route, false, true, Fun);
+
+sync_route(Route, false, true, Fun) ->
+ ok = Fun(rabbit_semi_durable_route, Route, write),
+ sync_route(Route, false, false, Fun);
+
+sync_route(Route, _SrcDurable, false, Fun) ->
+ sync_transient_route(Route, Fun).
+
+sync_transient_route(Route, Fun) ->
+ ok = Fun(rabbit_route, Route, write),
+ ok = Fun(rabbit_reverse_route, reverse_route(Route), write).
+
+call_with_source_and_destination(SrcName, DstName, Fun, ErrFun) ->
+ SrcTable = table_for_resource(SrcName),
+ DstTable = table_for_resource(DstName),
+ rabbit_misc:execute_mnesia_tx_with_tail(
+ fun () ->
+ case {mnesia:read({SrcTable, SrcName}),
+ mnesia:read({DstTable, DstName})} of
+ {[Src], [Dst]} -> Fun(Src, Dst);
+ {[], [_] } -> ErrFun([SrcName]);
+ {[_], [] } -> ErrFun([DstName]);
+ {[], [] } -> ErrFun([SrcName, DstName])
+ end
+ end).
+
+not_found_or_absent_errs(Names) ->
+ Errs = [not_found_or_absent(Name) || Name <- Names],
+ rabbit_misc:const({error, {resources_missing, Errs}}).
+
+absent_errs_only(Names) ->
+ Errs = [E || Name <- Names,
+ {absent, _Q, _Reason} = E <- [not_found_or_absent(Name)]],
+ rabbit_misc:const(case Errs of
+ [] -> ok;
+ _ -> {error, {resources_missing, Errs}}
+ end).
+
+table_for_resource(#resource{kind = exchange}) -> rabbit_exchange;
+table_for_resource(#resource{kind = queue}) -> rabbit_queue.
+
+not_found_or_absent(#resource{kind = exchange} = Name) ->
+ {not_found, Name};
+not_found_or_absent(#resource{kind = queue} = Name) ->
+ case rabbit_amqqueue:not_found_or_absent(Name) of
+ not_found -> {not_found, Name};
+ {absent, _Q, _Reason} = R -> R
+ end.
+
+contains(Table, MatchHead) ->
+ continue(mnesia:select(Table, [{MatchHead, [], ['$_']}], 1, read)).
+
+continue('$end_of_table') -> false;
+continue({[_|_], _}) -> true;
+continue({[], Continuation}) -> continue(mnesia:select(Continuation)).
+
+remove_routes(Routes) ->
+ %% This partitioning allows us to suppress unnecessary delete
+ %% operations on disk tables, which require an fsync.
+ {RamRoutes, DiskRoutes} =
+ lists:partition(fun (R) -> mnesia:read(
+ rabbit_durable_route, R#route.binding, read) == [] end,
+ Routes),
+ {RamOnlyRoutes, SemiDurableRoutes} =
+ lists:partition(fun (R) -> mnesia:read(
+ rabbit_semi_durable_route, R#route.binding, read) == [] end,
+ RamRoutes),
+ %% Of course the destination might not really be durable but it's
+ %% just as easy to try to delete it from the semi-durable table
+ %% than check first
+ [ok = sync_route(R, true, true, fun delete/3) ||
+ R <- DiskRoutes],
+ [ok = sync_route(R, false, true, fun delete/3) ||
+ R <- SemiDurableRoutes],
+ [ok = sync_route(R, false, false, fun delete/3) ||
+ R <- RamOnlyRoutes],
+ [R#route.binding || R <- Routes].
+
+
+delete(Tab, #route{binding = B}, LockKind) ->
+ mnesia:delete(Tab, B, LockKind);
+delete(Tab, #reverse_route{reverse_binding = B}, LockKind) ->
+ mnesia:delete(Tab, B, LockKind).
+
+remove_transient_routes(Routes) ->
+ [begin
+ ok = sync_transient_route(R, fun delete/3),
+ R#route.binding
+ end || R <- Routes].
+
+remove_for_destination(DstName, OnlyDurable, Fun) ->
+ lock_resource(DstName),
+ MatchFwd = #route{binding = #binding{destination = DstName, _ = '_'}},
+ MatchRev = reverse_route(MatchFwd),
+ Routes = case OnlyDurable of
+ false ->
+ [reverse_route(R) ||
+ R <- mnesia:dirty_match_object(
+ rabbit_reverse_route, MatchRev)];
+ true -> lists:usort(
+ mnesia:dirty_match_object(
+ rabbit_durable_route, MatchFwd) ++
+ mnesia:dirty_match_object(
+ rabbit_semi_durable_route, MatchFwd))
+ end,
+ Bindings = Fun(Routes),
+ group_bindings_fold(fun maybe_auto_delete/4, new_deletions(),
+ lists:keysort(#binding.source, Bindings), OnlyDurable).
+
+%% Instead of locking entire table on remove operations we can lock the
+%% affected resource only.
+lock_resource(Name) -> lock_resource(Name, write).
+
+lock_resource(Name, LockKind) ->
+ mnesia:lock({global, Name, mnesia:table_info(rabbit_route, where_to_write)},
+ LockKind).
+
+%% Requires that its input binding list is sorted in exchange-name
+%% order, so that the grouping of bindings (for passing to
+%% group_bindings_and_auto_delete1) works properly.
+group_bindings_fold(_Fun, Acc, [], _OnlyDurable) ->
+ Acc;
+group_bindings_fold(Fun, Acc, [B = #binding{source = SrcName} | Bs],
+ OnlyDurable) ->
+ group_bindings_fold(Fun, SrcName, Acc, Bs, [B], OnlyDurable).
+
+group_bindings_fold(
+ Fun, SrcName, Acc, [B = #binding{source = SrcName} | Bs], Bindings,
+ OnlyDurable) ->
+ group_bindings_fold(Fun, SrcName, Acc, Bs, [B | Bindings], OnlyDurable);
+group_bindings_fold(Fun, SrcName, Acc, Removed, Bindings, OnlyDurable) ->
+ %% Either Removed is [], or its head has a non-matching SrcName.
+ group_bindings_fold(Fun, Fun(SrcName, Bindings, Acc, OnlyDurable), Removed,
+ OnlyDurable).
+
+maybe_auto_delete(XName, Bindings, Deletions, OnlyDurable) ->
+ {Entry, Deletions1} =
+ case mnesia:read({case OnlyDurable of
+ true -> rabbit_durable_exchange;
+ false -> rabbit_exchange
+ end, XName}) of
+ [] -> {{undefined, not_deleted, Bindings}, Deletions};
+ [X] -> case rabbit_exchange:maybe_auto_delete(X, OnlyDurable) of
+ not_deleted ->
+ {{X, not_deleted, Bindings}, Deletions};
+ {deleted, Deletions2} ->
+ {{X, deleted, Bindings},
+ combine_deletions(Deletions, Deletions2)}
+ end
+ end,
+ add_deletion(XName, Entry, Deletions1).
+
+reverse_route(#route{binding = Binding}) ->
+ #reverse_route{reverse_binding = reverse_binding(Binding)};
+
+reverse_route(#reverse_route{reverse_binding = Binding}) ->
+ #route{binding = reverse_binding(Binding)}.
+
+reverse_binding(#reverse_binding{source = SrcName,
+ destination = DstName,
+ key = Key,
+ args = Args}) ->
+ #binding{source = SrcName,
+ destination = DstName,
+ key = Key,
+ args = Args};
+
+reverse_binding(#binding{source = SrcName,
+ destination = DstName,
+ key = Key,
+ args = Args}) ->
+ #reverse_binding{source = SrcName,
+ destination = DstName,
+ key = Key,
+ args = Args}.
+
+%% ----------------------------------------------------------------------------
+%% Binding / exchange deletion abstraction API
+%% ----------------------------------------------------------------------------
+
+anything_but( NotThis, NotThis, NotThis) -> NotThis;
+anything_but( NotThis, NotThis, This) -> This;
+anything_but( NotThis, This, NotThis) -> This;
+anything_but(_NotThis, This, This) -> This.
+
+-spec new_deletions() -> deletions().
+
+new_deletions() -> dict:new().
+
+-spec add_deletion
+ (rabbit_exchange:name(),
+ {'undefined' | rabbit_types:exchange(),
+ 'deleted' | 'not_deleted',
+ bindings()},
+ deletions()) ->
+ deletions().
+
+add_deletion(XName, Entry, Deletions) ->
+ dict:update(XName, fun (Entry1) -> merge_entry(Entry1, Entry) end,
+ Entry, Deletions).
+
+-spec combine_deletions(deletions(), deletions()) -> deletions().
+
+combine_deletions(Deletions1, Deletions2) ->
+ dict:merge(fun (_XName, Entry1, Entry2) -> merge_entry(Entry1, Entry2) end,
+ Deletions1, Deletions2).
+
+merge_entry({X1, Deleted1, Bindings1}, {X2, Deleted2, Bindings2}) ->
+ {anything_but(undefined, X1, X2),
+ anything_but(not_deleted, Deleted1, Deleted2),
+ [Bindings1 | Bindings2]}.
+
+-spec process_deletions(deletions(), rabbit_types:username()) -> rabbit_misc:thunk('ok').
+
+process_deletions(Deletions, ActingUser) ->
+ AugmentedDeletions =
+ dict:map(fun (_XName, {X, deleted, Bindings}) ->
+ Bs = lists:flatten(Bindings),
+ x_callback(transaction, X, delete, Bs),
+ {X, deleted, Bs, none};
+ (_XName, {X, not_deleted, Bindings}) ->
+ Bs = lists:flatten(Bindings),
+ x_callback(transaction, X, remove_bindings, Bs),
+ {X, not_deleted, Bs, rabbit_exchange:serial(X)}
+ end, Deletions),
+ fun() ->
+ dict:fold(fun (XName, {X, deleted, Bs, Serial}, ok) ->
+ ok = rabbit_event:notify(
+ exchange_deleted,
+ [{name, XName},
+ {user_who_performed_action, ActingUser}]),
+ del_notify(Bs, ActingUser),
+ x_callback(Serial, X, delete, Bs);
+ (_XName, {X, not_deleted, Bs, Serial}, ok) ->
+ del_notify(Bs, ActingUser),
+ x_callback(Serial, X, remove_bindings, Bs)
+ end, ok, AugmentedDeletions)
+ end.
+
+del_notify(Bs, ActingUser) -> [rabbit_event:notify(
+ binding_deleted,
+ info(B) ++ [{user_who_performed_action, ActingUser}])
+ || B <- Bs].
+
+x_callback(Serial, X, F, Bs) ->
+ ok = rabbit_exchange:callback(X, F, Serial, [X, Bs]).
diff --git a/deps/rabbit/src/rabbit_boot_steps.erl b/deps/rabbit/src/rabbit_boot_steps.erl
new file mode 100644
index 0000000000..f87448edb7
--- /dev/null
+++ b/deps/rabbit/src/rabbit_boot_steps.erl
@@ -0,0 +1,91 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_boot_steps).
+
+-export([run_boot_steps/0, run_boot_steps/1, run_cleanup_steps/1]).
+-export([find_steps/0, find_steps/1]).
+
+run_boot_steps() ->
+ run_boot_steps(loaded_applications()).
+
+run_boot_steps(Apps) ->
+ [begin
+ rabbit_log:info("Running boot step ~s defined by app ~s", [Step, App]),
+ ok = run_step(Attrs, mfa)
+ end || {App, Step, Attrs} <- find_steps(Apps)],
+ ok.
+
+run_cleanup_steps(Apps) ->
+ [run_step(Attrs, cleanup) || {_, _, Attrs} <- find_steps(Apps)],
+ ok.
+
+loaded_applications() ->
+ [App || {App, _, _} <- application:loaded_applications()].
+
+find_steps() ->
+ find_steps(loaded_applications()).
+
+find_steps(Apps) ->
+ All = sort_boot_steps(rabbit_misc:all_module_attributes(rabbit_boot_step)),
+ [Step || {App, _, _} = Step <- All, lists:member(App, Apps)].
+
+run_step(Attributes, AttributeName) ->
+ [begin
+ rabbit_log:debug("Applying MFA: M = ~s, F = ~s, A = ~p",
+ [M, F, A]),
+ case apply(M,F,A) of
+ ok -> ok;
+ {error, Reason} -> exit({error, Reason})
+ end
+ end
+ || {Key, {M,F,A}} <- Attributes,
+ Key =:= AttributeName],
+ ok.
+
+vertices({AppName, _Module, Steps}) ->
+ [{StepName, {AppName, StepName, Atts}} || {StepName, Atts} <- Steps].
+
+edges({_AppName, _Module, Steps}) ->
+ EnsureList = fun (L) when is_list(L) -> L;
+ (T) -> [T]
+ end,
+ [case Key of
+ requires -> {StepName, OtherStep};
+ enables -> {OtherStep, StepName}
+ end || {StepName, Atts} <- Steps,
+ {Key, OtherStepOrSteps} <- Atts,
+ OtherStep <- EnsureList(OtherStepOrSteps),
+ Key =:= requires orelse Key =:= enables].
+
+sort_boot_steps(UnsortedSteps) ->
+ case rabbit_misc:build_acyclic_graph(fun vertices/1, fun edges/1,
+ UnsortedSteps) of
+ {ok, G} ->
+ %% Use topological sort to find a consistent ordering (if
+ %% there is one, otherwise fail).
+ SortedSteps = lists:reverse(
+ [begin
+ {StepName, Step} = digraph:vertex(G,
+ StepName),
+ Step
+ end || StepName <- digraph_utils:topsort(G)]),
+ digraph:delete(G),
+ %% Check that all mentioned {M,F,A} triples are exported.
+ case [{StepName, {M,F,A}} ||
+ {_App, StepName, Attributes} <- SortedSteps,
+ {mfa, {M,F,A}} <- Attributes,
+ code:ensure_loaded(M) =/= {module, M} orelse
+ not erlang:function_exported(M, F, length(A))] of
+ [] -> SortedSteps;
+ MissingFns -> exit({boot_functions_not_exported, MissingFns})
+ end;
+ {error, {vertex, duplicate, StepName}} ->
+ exit({duplicate_boot_step, StepName});
+ {error, {edge, Reason, From, To}} ->
+ exit({invalid_boot_step_dependency, From, To, Reason})
+ end.
diff --git a/deps/rabbit/src/rabbit_channel.erl b/deps/rabbit/src/rabbit_channel.erl
new file mode 100644
index 0000000000..8e7828a7c0
--- /dev/null
+++ b/deps/rabbit/src/rabbit_channel.erl
@@ -0,0 +1,2797 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_channel).
+
+%% Transitional step until we can require Erlang/OTP 21 and
+%% use the now recommended try/catch syntax for obtaining the stack trace.
+-compile(nowarn_deprecated_function).
+
+%% rabbit_channel processes represent an AMQP 0-9-1 channels.
+%%
+%% Connections parse protocol frames coming from clients and
+%% dispatch them to channel processes.
+%% Channels are responsible for implementing the logic behind
+%% the various protocol methods, involving other processes as
+%% needed:
+%%
+%% * Routing messages (using functions in various exchange type
+%% modules) to queue processes.
+%% * Managing queues, exchanges, and bindings.
+%% * Keeping track of consumers
+%% * Keeping track of unacknowledged deliveries to consumers
+%% * Keeping track of publisher confirms
+%% * Transaction management
+%% * Authorisation (enforcing permissions)
+%% * Publishing trace events if tracing is enabled
+%%
+%% Every channel has a number of dependent processes:
+%%
+%% * A writer which is responsible for sending frames to clients.
+%% * A limiter which controls how many messages can be delivered
+%% to consumers according to active QoS prefetch and internal
+%% flow control logic.
+%%
+%% Channels are also aware of their connection's queue collector.
+%% When a queue is declared as exclusive on a channel, the channel
+%% will notify queue collector of that queue.
+
+-include_lib("rabbit_common/include/rabbit_framing.hrl").
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include_lib("rabbit_common/include/rabbit_misc.hrl").
+
+-include("amqqueue.hrl").
+
+-behaviour(gen_server2).
+
+-export([start_link/11, start_link/12, do/2, do/3, do_flow/3, flush/1, shutdown/1]).
+-export([send_command/2, deliver/4, deliver_reply/2,
+ send_credit_reply/2, send_drained/2]).
+-export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1,
+ emit_info_all/4, info_local/1]).
+-export([refresh_config_local/0, ready_for_close/1]).
+-export([refresh_interceptors/0]).
+-export([force_event_refresh/1]).
+-export([update_user_state/2]).
+
+-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
+ handle_info/2, handle_pre_hibernate/1, handle_post_hibernate/1,
+ prioritise_call/4, prioritise_cast/3, prioritise_info/3,
+ format_message_queue/2]).
+
+%% Internal
+-export([list_local/0, emit_info_local/3, deliver_reply_local/3]).
+-export([get_vhost/1, get_user/1]).
+%% For testing
+-export([build_topic_variable_map/3]).
+-export([list_queue_states/1, get_max_message_size/0]).
+
+%% Mgmt HTTP API refactor
+-export([handle_method/6]).
+
+-record(conf, {
+ %% starting | running | flow | closing
+ state,
+ %% same as reader's protocol. Used when instantiating
+ %% (protocol) exceptions.
+ protocol,
+ %% channel number
+ channel,
+ %% reader process
+ reader_pid,
+ %% writer process
+ writer_pid,
+ %%
+ conn_pid,
+ %% same as reader's name, see #v1.name
+ %% in rabbit_reader
+ conn_name,
+ %% channel's originating source e.g. rabbit_reader | rabbit_direct | undefined
+ %% or any other channel creating/spawning entity
+ source,
+ %% same as #v1.user in the reader, used in
+ %% authorisation checks
+ user,
+ %% same as #v1.user in the reader
+ virtual_host,
+ %% when queue.bind's queue field is empty,
+ %% this name will be used instead
+ most_recently_declared_queue,
+ %% when a queue is declared as exclusive, queue
+ %% collector must be notified.
+ %% see rabbit_queue_collector for more info.
+ queue_collector_pid,
+
+ %% same as capabilities in the reader
+ capabilities,
+ %% tracing exchange resource if tracing is enabled,
+ %% 'none' otherwise
+ trace_state,
+ consumer_prefetch,
+ %% Message content size limit
+ max_message_size,
+ consumer_timeout,
+ authz_context,
+ %% defines how ofter gc will be executed
+ writer_gc_threshold
+ }).
+
+-record(pending_ack, {delivery_tag,
+ tag,
+ delivered_at,
+ queue, %% queue name
+ msg_id}).
+
+-record(ch, {cfg :: #conf{},
+ %% limiter state, see rabbit_limiter
+ limiter,
+ %% none | {Msgs, Acks} | committing | failed |
+ tx,
+ %% (consumer) delivery tag sequence
+ next_tag,
+ %% messages pending consumer acknowledgement
+ unacked_message_q,
+ %% queue processes are monitored to update
+ %% queue names
+ queue_monitors,
+ %% a map of consumer tags to
+ %% consumer details: #amqqueue record, acknowledgement mode,
+ %% consumer exclusivity, etc
+ consumer_mapping,
+ %% a map of queue names to consumer tag lists
+ queue_consumers,
+ %% timer used to emit statistics
+ stats_timer,
+ %% are publisher confirms enabled for this channel?
+ confirm_enabled,
+ %% publisher confirm delivery tag sequence
+ publish_seqno,
+ %% an unconfirmed_messages data structure used to track unconfirmed
+ %% (to publishers) messages
+ unconfirmed,
+ %% a list of tags for published messages that were
+ %% delivered but are yet to be confirmed to the client
+ confirmed,
+ %% a list of tags for published messages that were
+ %% rejected but are yet to be sent to the client
+ rejected,
+ %% used by "one shot RPC" (amq.
+ reply_consumer,
+ %% flow | noflow, see rabbitmq-server#114
+ delivery_flow,
+ interceptor_state,
+ queue_states,
+ tick_timer
+ }).
+
+-define(QUEUE, lqueue).
+
+-define(MAX_PERMISSION_CACHE_SIZE, 12).
+
+-define(REFRESH_TIMEOUT, 15000).
+
+-define(STATISTICS_KEYS,
+ [reductions,
+ pid,
+ transactional,
+ confirm,
+ consumer_count,
+ messages_unacknowledged,
+ messages_unconfirmed,
+ messages_uncommitted,
+ acks_uncommitted,
+ pending_raft_commands,
+ prefetch_count,
+ global_prefetch_count,
+ state,
+ garbage_collection]).
+
+
+-define(CREATION_EVENT_KEYS,
+ [pid,
+ name,
+ connection,
+ number,
+ user,
+ vhost,
+ user_who_performed_action]).
+
+-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
+
+-define(INCR_STATS(Type, Key, Inc, Measure, State),
+ case rabbit_event:stats_level(State, #ch.stats_timer) of
+ fine ->
+ rabbit_core_metrics:channel_stats(Type, Measure, {self(), Key}, Inc),
+ %% Keys in the process dictionary are used to clean up the core metrics
+ put({Type, Key}, none);
+ _ ->
+ ok
+ end).
+
+-define(INCR_STATS(Type, Key, Inc, Measure),
+ begin
+ rabbit_core_metrics:channel_stats(Type, Measure, {self(), Key}, Inc),
+ %% Keys in the process dictionary are used to clean up the core metrics
+ put({Type, Key}, none)
+ end).
+
+%%----------------------------------------------------------------------------
+
+-export_type([channel_number/0]).
+
+-type channel_number() :: non_neg_integer().
+
+-export_type([channel/0]).
+
+-type channel() :: #ch{}.
+
+%%----------------------------------------------------------------------------
+
+-spec start_link
+ (channel_number(), pid(), pid(), pid(), string(), rabbit_types:protocol(),
+ rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(),
+ pid(), pid()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User,
+ VHost, Capabilities, CollectorPid, Limiter) ->
+ start_link(Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User,
+ VHost, Capabilities, CollectorPid, Limiter, undefined).
+
+-spec start_link
+ (channel_number(), pid(), pid(), pid(), string(), rabbit_types:protocol(),
+ rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(),
+ pid(), pid(), any()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User,
+ VHost, Capabilities, CollectorPid, Limiter, AmqpParams) ->
+ gen_server2:start_link(
+ ?MODULE, [Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol,
+ User, VHost, Capabilities, CollectorPid, Limiter, AmqpParams], []).
+
+-spec do(pid(), rabbit_framing:amqp_method_record()) -> 'ok'.
+
+do(Pid, Method) ->
+ rabbit_channel_common:do(Pid, Method).
+
+-spec do
+ (pid(), rabbit_framing:amqp_method_record(),
+ rabbit_types:maybe(rabbit_types:content())) ->
+ 'ok'.
+
+do(Pid, Method, Content) ->
+ rabbit_channel_common:do(Pid, Method, Content).
+
+-spec do_flow
+ (pid(), rabbit_framing:amqp_method_record(),
+ rabbit_types:maybe(rabbit_types:content())) ->
+ 'ok'.
+
+do_flow(Pid, Method, Content) ->
+ rabbit_channel_common:do_flow(Pid, Method, Content).
+
+-spec flush(pid()) -> 'ok'.
+
+flush(Pid) ->
+ gen_server2:call(Pid, flush, infinity).
+
+-spec shutdown(pid()) -> 'ok'.
+
+shutdown(Pid) ->
+ gen_server2:cast(Pid, terminate).
+
+-spec send_command(pid(), rabbit_framing:amqp_method_record()) -> 'ok'.
+
+send_command(Pid, Msg) ->
+ gen_server2:cast(Pid, {command, Msg}).
+
+-spec deliver
+ (pid(), rabbit_types:ctag(), boolean(), rabbit_amqqueue:qmsg()) -> 'ok'.
+
+deliver(Pid, ConsumerTag, AckRequired, Msg) ->
+ gen_server2:cast(Pid, {deliver, ConsumerTag, AckRequired, Msg}).
+
+-spec deliver_reply(binary(), rabbit_types:delivery()) -> 'ok'.
+
+deliver_reply(<<"amq.rabbitmq.reply-to.", Rest/binary>>, Delivery) ->
+ case decode_fast_reply_to(Rest) of
+ {ok, Pid, Key} ->
+ delegate:invoke_no_result(
+ Pid, {?MODULE, deliver_reply_local, [Key, Delivery]});
+ error ->
+ ok
+ end.
+
+%% We want to ensure people can't use this mechanism to send a message
+%% to an arbitrary process and kill it!
+
+-spec deliver_reply_local(pid(), binary(), rabbit_types:delivery()) -> 'ok'.
+
+deliver_reply_local(Pid, Key, Delivery) ->
+ case pg_local:in_group(rabbit_channels, Pid) of
+ true -> gen_server2:cast(Pid, {deliver_reply, Key, Delivery});
+ false -> ok
+ end.
+
+declare_fast_reply_to(<<"amq.rabbitmq.reply-to">>) ->
+ exists;
+declare_fast_reply_to(<<"amq.rabbitmq.reply-to.", Rest/binary>>) ->
+ case decode_fast_reply_to(Rest) of
+ {ok, Pid, Key} ->
+ Msg = {declare_fast_reply_to, Key},
+ rabbit_misc:with_exit_handler(
+ rabbit_misc:const(not_found),
+ fun() -> gen_server2:call(Pid, Msg, infinity) end);
+ error ->
+ not_found
+ end;
+declare_fast_reply_to(_) ->
+ not_found.
+
+decode_fast_reply_to(Rest) ->
+ case string:tokens(binary_to_list(Rest), ".") of
+ [PidEnc, Key] -> Pid = binary_to_term(base64:decode(PidEnc)),
+ {ok, Pid, Key};
+ _ -> error
+ end.
+
+-spec send_credit_reply(pid(), non_neg_integer()) -> 'ok'.
+
+send_credit_reply(Pid, Len) ->
+ gen_server2:cast(Pid, {send_credit_reply, Len}).
+
+-spec send_drained(pid(), [{rabbit_types:ctag(), non_neg_integer()}]) -> 'ok'.
+
+send_drained(Pid, CTagCredit) ->
+ gen_server2:cast(Pid, {send_drained, CTagCredit}).
+
+-spec list() -> [pid()].
+
+list() ->
+ Nodes = rabbit_nodes:all_running(),
+ rabbit_misc:append_rpc_all_nodes(Nodes, rabbit_channel, list_local, [], ?RPC_TIMEOUT).
+
+-spec list_local() -> [pid()].
+
+list_local() ->
+ pg_local:get_members(rabbit_channels).
+
+-spec info_keys() -> rabbit_types:info_keys().
+
+info_keys() -> ?INFO_KEYS.
+
+-spec info(pid()) -> rabbit_types:infos().
+
+info(Pid) ->
+ {Timeout, Deadline} = get_operation_timeout_and_deadline(),
+ try
+ case gen_server2:call(Pid, {info, Deadline}, Timeout) of
+ {ok, Res} -> Res;
+ {error, Error} -> throw(Error)
+ end
+ catch
+ exit:{timeout, _} ->
+ rabbit_log:error("Timed out getting channel ~p info", [Pid]),
+ throw(timeout)
+ end.
+
+-spec info(pid(), rabbit_types:info_keys()) -> rabbit_types:infos().
+
+info(Pid, Items) ->
+ {Timeout, Deadline} = get_operation_timeout_and_deadline(),
+ try
+ case gen_server2:call(Pid, {{info, Items}, Deadline}, Timeout) of
+ {ok, Res} -> Res;
+ {error, Error} -> throw(Error)
+ end
+ catch
+ exit:{timeout, _} ->
+ rabbit_log:error("Timed out getting channel ~p info", [Pid]),
+ throw(timeout)
+ end.
+
+-spec info_all() -> [rabbit_types:infos()].
+
+info_all() ->
+ rabbit_misc:filter_exit_map(fun (C) -> info(C) end, list()).
+
+-spec info_all(rabbit_types:info_keys()) -> [rabbit_types:infos()].
+
+info_all(Items) ->
+ rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list()).
+
+info_local(Items) ->
+ rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list_local()).
+
+emit_info_all(Nodes, Items, Ref, AggregatorPid) ->
+ Pids = [ spawn_link(Node, rabbit_channel, emit_info_local, [Items, Ref, AggregatorPid]) || Node <- Nodes ],
+ rabbit_control_misc:await_emitters_termination(Pids).
+
+emit_info_local(Items, Ref, AggregatorPid) ->
+ emit_info(list_local(), Items, Ref, AggregatorPid).
+
+emit_info(PidList, InfoItems, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map_with_exit_handler(
+ AggregatorPid, Ref, fun(C) -> info(C, InfoItems) end, PidList).
+
+-spec refresh_config_local() -> 'ok'.
+
+refresh_config_local() ->
+ rabbit_misc:upmap(
+ fun (C) ->
+ try
+ gen_server2:call(C, refresh_config, infinity)
+ catch _:Reason ->
+ rabbit_log:error("Failed to refresh channel config "
+ "for channel ~p. Reason ~p",
+ [C, Reason])
+ end
+ end,
+ list_local()),
+ ok.
+
+refresh_interceptors() ->
+ rabbit_misc:upmap(
+ fun (C) ->
+ try
+ gen_server2:call(C, refresh_interceptors, ?REFRESH_TIMEOUT)
+ catch _:Reason ->
+ rabbit_log:error("Failed to refresh channel interceptors "
+ "for channel ~p. Reason ~p",
+ [C, Reason])
+ end
+ end,
+ list_local()),
+ ok.
+
+-spec ready_for_close(pid()) -> 'ok'.
+
+ready_for_close(Pid) ->
+ rabbit_channel_common:ready_for_close(Pid).
+
+-spec force_event_refresh(reference()) -> 'ok'.
+
+% Note: https://www.pivotaltracker.com/story/show/166962656
+% This event is necessary for the stats timer to be initialized with
+% the correct values once the management agent has started
+force_event_refresh(Ref) ->
+ [gen_server2:cast(C, {force_event_refresh, Ref}) || C <- list()],
+ ok.
+
+list_queue_states(Pid) ->
+ gen_server2:call(Pid, list_queue_states).
+
+-spec update_user_state(pid(), rabbit_types:auth_user()) -> 'ok' | {error, channel_terminated}.
+
+update_user_state(Pid, UserState) when is_pid(Pid) ->
+ case erlang:is_process_alive(Pid) of
+ true -> Pid ! {update_user_state, UserState},
+ ok;
+ false -> {error, channel_terminated}
+ end.
+
+%%---------------------------------------------------------------------------
+
+init([Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, VHost,
+ Capabilities, CollectorPid, LimiterPid, AmqpParams]) ->
+ process_flag(trap_exit, true),
+ ?LG_PROCESS_TYPE(channel),
+ ?store_proc_name({ConnName, Channel}),
+ ok = pg_local:join(rabbit_channels, self()),
+ Flow = case rabbit_misc:get_env(rabbit, mirroring_flow_control, true) of
+ true -> flow;
+ false -> noflow
+ end,
+ {ok, {Global, Prefetch}} = application:get_env(rabbit, default_consumer_prefetch),
+ Limiter0 = rabbit_limiter:new(LimiterPid),
+ Limiter = case {Global, Prefetch} of
+ {true, 0} ->
+ rabbit_limiter:unlimit_prefetch(Limiter0);
+ {true, _} ->
+ rabbit_limiter:limit_prefetch(Limiter0, Prefetch, 0);
+ _ ->
+ Limiter0
+ end,
+ %% Process dictionary is used here because permission cache already uses it. MK.
+ put(permission_cache_can_expire, rabbit_access_control:permission_cache_can_expire(User)),
+ MaxMessageSize = get_max_message_size(),
+ ConsumerTimeout = get_consumer_timeout(),
+ OptionalVariables = extract_variable_map_from_amqp_params(AmqpParams),
+ {ok, GCThreshold} = application:get_env(rabbit, writer_gc_threshold),
+ State = #ch{cfg = #conf{state = starting,
+ protocol = Protocol,
+ channel = Channel,
+ reader_pid = ReaderPid,
+ writer_pid = WriterPid,
+ conn_pid = ConnPid,
+ conn_name = ConnName,
+ user = User,
+ virtual_host = VHost,
+ most_recently_declared_queue = <<>>,
+ queue_collector_pid = CollectorPid,
+ capabilities = Capabilities,
+ trace_state = rabbit_trace:init(VHost),
+ consumer_prefetch = Prefetch,
+ max_message_size = MaxMessageSize,
+ consumer_timeout = ConsumerTimeout,
+ authz_context = OptionalVariables,
+ writer_gc_threshold = GCThreshold
+ },
+ limiter = Limiter,
+ tx = none,
+ next_tag = 1,
+ unacked_message_q = ?QUEUE:new(),
+ queue_monitors = pmon:new(),
+ consumer_mapping = #{},
+ queue_consumers = #{},
+ confirm_enabled = false,
+ publish_seqno = 1,
+ unconfirmed = rabbit_confirms:init(),
+ rejected = [],
+ confirmed = [],
+ reply_consumer = none,
+ delivery_flow = Flow,
+ interceptor_state = undefined,
+ queue_states = rabbit_queue_type:init()
+ },
+ State1 = State#ch{
+ interceptor_state = rabbit_channel_interceptor:init(State)},
+ State2 = rabbit_event:init_stats_timer(State1, #ch.stats_timer),
+ Infos = infos(?CREATION_EVENT_KEYS, State2),
+ rabbit_core_metrics:channel_created(self(), Infos),
+ rabbit_event:notify(channel_created, Infos),
+ rabbit_event:if_enabled(State2, #ch.stats_timer,
+ fun() -> emit_stats(State2) end),
+ put_operation_timeout(),
+ State3 = init_tick_timer(State2),
+ {ok, State3, hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+prioritise_call(Msg, _From, _Len, _State) ->
+ case Msg of
+ info -> 9;
+ {info, _Items} -> 9;
+ _ -> 0
+ end.
+
+prioritise_cast(Msg, _Len, _State) ->
+ case Msg of
+ {confirm, _MsgSeqNos, _QPid} -> 5;
+ {reject_publish, _MsgSeqNos, _QPid} -> 5;
+ {queue_event, _, {confirm, _MsgSeqNos, _QPid}} -> 5;
+ {queue_event, _, {reject_publish, _MsgSeqNos, _QPid}} -> 5;
+ _ -> 0
+ end.
+
+prioritise_info(Msg, _Len, _State) ->
+ case Msg of
+ emit_stats -> 7;
+ _ -> 0
+ end.
+
+handle_call(flush, _From, State) ->
+ reply(ok, State);
+
+handle_call({info, Deadline}, _From, State) ->
+ try
+ reply({ok, infos(?INFO_KEYS, Deadline, State)}, State)
+ catch
+ Error ->
+ reply({error, Error}, State)
+ end;
+
+handle_call({{info, Items}, Deadline}, _From, State) ->
+ try
+ reply({ok, infos(Items, Deadline, State)}, State)
+ catch
+ Error ->
+ reply({error, Error}, State)
+ end;
+
+handle_call(refresh_config, _From,
+ State = #ch{cfg = #conf{virtual_host = VHost} = Cfg}) ->
+ reply(ok, State#ch{cfg = Cfg#conf{trace_state = rabbit_trace:init(VHost)}});
+
+handle_call(refresh_interceptors, _From, State) ->
+ IState = rabbit_channel_interceptor:init(State),
+ reply(ok, State#ch{interceptor_state = IState});
+
+handle_call({declare_fast_reply_to, Key}, _From,
+ State = #ch{reply_consumer = Consumer}) ->
+ reply(case Consumer of
+ {_, _, Key} -> exists;
+ _ -> not_found
+ end, State);
+
+handle_call(list_queue_states, _From, State = #ch{queue_states = QueueStates}) ->
+ %% For testing of cleanup only
+ %% HACK
+ {reply, maps:keys(element(2, QueueStates)), State};
+
+handle_call(_Request, _From, State) ->
+ noreply(State).
+
+handle_cast({method, Method, Content, Flow},
+ State = #ch{cfg = #conf{reader_pid = Reader},
+ interceptor_state = IState}) ->
+ case Flow of
+ %% We are going to process a message from the rabbit_reader
+ %% process, so here we ack it. In this case we are accessing
+ %% the rabbit_channel process dictionary.
+ flow -> credit_flow:ack(Reader);
+ noflow -> ok
+ end,
+ try handle_method(rabbit_channel_interceptor:intercept_in(
+ expand_shortcuts(Method, State), Content, IState),
+ State) of
+ {reply, Reply, NewState} ->
+ ok = send(Reply, NewState),
+ noreply(NewState);
+ {noreply, NewState} ->
+ noreply(NewState);
+ stop ->
+ {stop, normal, State}
+ catch
+ exit:Reason = #amqp_error{} ->
+ MethodName = rabbit_misc:method_record_type(Method),
+ handle_exception(Reason#amqp_error{method = MethodName}, State);
+ _:Reason:Stacktrace ->
+ {stop, {Reason, Stacktrace}, State}
+ end;
+
+handle_cast(ready_for_close,
+ State = #ch{cfg = #conf{state = closing,
+ writer_pid = WriterPid}}) ->
+ ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}),
+ {stop, normal, State};
+
+handle_cast(terminate, State = #ch{cfg = #conf{writer_pid = WriterPid}}) ->
+ ok = rabbit_writer:flush(WriterPid),
+ {stop, normal, State};
+
+handle_cast({command, #'basic.consume_ok'{consumer_tag = CTag} = Msg}, State) ->
+ ok = send(Msg, State),
+ noreply(consumer_monitor(CTag, State));
+
+handle_cast({command, Msg}, State) ->
+ ok = send(Msg, State),
+ noreply(State);
+
+handle_cast({deliver, _CTag, _AckReq, _Msg},
+ State = #ch{cfg = #conf{state = closing}}) ->
+ noreply(State);
+handle_cast({deliver, ConsumerTag, AckRequired, Msg}, State) ->
+ % TODO: handle as action
+ noreply(handle_deliver(ConsumerTag, AckRequired, Msg, State));
+
+handle_cast({deliver_reply, _K, _Del},
+ State = #ch{cfg = #conf{state = closing}}) ->
+ noreply(State);
+handle_cast({deliver_reply, _K, _Del}, State = #ch{reply_consumer = none}) ->
+ noreply(State);
+handle_cast({deliver_reply, Key, #delivery{message =
+ #basic_message{exchange_name = ExchangeName,
+ routing_keys = [RoutingKey | _CcRoutes],
+ content = Content}}},
+ State = #ch{cfg = #conf{writer_pid = WriterPid},
+ next_tag = DeliveryTag,
+ reply_consumer = {ConsumerTag, _Suffix, Key}}) ->
+ ok = rabbit_writer:send_command(
+ WriterPid,
+ #'basic.deliver'{consumer_tag = ConsumerTag,
+ delivery_tag = DeliveryTag,
+ redelivered = false,
+ exchange = ExchangeName#resource.name,
+ routing_key = RoutingKey},
+ Content),
+ noreply(State);
+handle_cast({deliver_reply, _K1, _}, State=#ch{reply_consumer = {_, _, _K2}}) ->
+ noreply(State);
+
+handle_cast({send_credit_reply, Len},
+ State = #ch{cfg = #conf{writer_pid = WriterPid}}) ->
+ ok = rabbit_writer:send_command(
+ WriterPid, #'basic.credit_ok'{available = Len}),
+ noreply(State);
+
+handle_cast({send_drained, CTagCredit},
+ State = #ch{cfg = #conf{writer_pid = WriterPid}}) ->
+ [ok = rabbit_writer:send_command(
+ WriterPid, #'basic.credit_drained'{consumer_tag = ConsumerTag,
+ credit_drained = CreditDrained})
+ || {ConsumerTag, CreditDrained} <- CTagCredit],
+ noreply(State);
+
+% Note: https://www.pivotaltracker.com/story/show/166962656
+% This event is necessary for the stats timer to be initialized with
+% the correct values once the management agent has started
+handle_cast({force_event_refresh, Ref}, State) ->
+ rabbit_event:notify(channel_created, infos(?CREATION_EVENT_KEYS, State),
+ Ref),
+ noreply(rabbit_event:init_stats_timer(State, #ch.stats_timer));
+
+handle_cast({mandatory_received, _MsgSeqNo}, State) ->
+ %% This feature was used by `rabbit_amqqueue_process` and
+ %% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x.
+ %% It is unused in 3.8.x and thus deprecated. We keep it to support
+ %% in-place upgrades to 3.8.x (i.e. mixed-version clusters), but it
+ %% is a no-op starting with that version.
+ %%
+ %% NB: don't call noreply/1 since we don't want to send confirms.
+ noreply_coalesce(State);
+
+handle_cast({reject_publish, _MsgSeqNo, QPid} = Evt, State) ->
+ %% For backwards compatibility
+ QRef = find_queue_name_from_pid(QPid, State#ch.queue_states),
+ case QRef of
+ undefined ->
+ %% ignore if no queue could be found for the given pid
+ noreply(State);
+ _ ->
+ handle_cast({queue_event, QRef, Evt}, State)
+ end;
+
+handle_cast({confirm, _MsgSeqNo, QPid} = Evt, State) ->
+ %% For backwards compatibility
+ QRef = find_queue_name_from_pid(QPid, State#ch.queue_states),
+ case QRef of
+ undefined ->
+ %% ignore if no queue could be found for the given pid
+ noreply(State);
+ _ ->
+ handle_cast({queue_event, QRef, Evt}, State)
+ end;
+handle_cast({queue_event, QRef, Evt},
+ #ch{queue_states = QueueStates0} = State0) ->
+ case rabbit_queue_type:handle_event(QRef, Evt, QueueStates0) of
+ {ok, QState1, Actions} ->
+ State1 = State0#ch{queue_states = QState1},
+ State = handle_queue_actions(Actions, State1),
+ noreply_coalesce(State);
+ eol ->
+ State1 = handle_consuming_queue_down_or_eol(QRef, State0),
+ {ConfirmMXs, UC1} =
+ rabbit_confirms:remove_queue(QRef, State1#ch.unconfirmed),
+ %% Deleted queue is a special case.
+ %% Do not nack the "rejected" messages.
+ State2 = record_confirms(ConfirmMXs,
+ State1#ch{unconfirmed = UC1}),
+ erase_queue_stats(QRef),
+ noreply_coalesce(
+ State2#ch{queue_states = rabbit_queue_type:remove(QRef, QueueStates0)});
+ {protocol_error, Type, Reason, ReasonArgs} ->
+ rabbit_misc:protocol_error(Type, Reason, ReasonArgs)
+ end.
+
+handle_info({ra_event, {Name, _} = From, Evt}, State) ->
+ %% For backwards compatibility
+ QRef = find_queue_name_from_quorum_name(Name, State#ch.queue_states),
+ handle_cast({queue_event, QRef, {From, Evt}}, State);
+
+handle_info({bump_credit, Msg}, State) ->
+ %% A rabbit_amqqueue_process is granting credit to our channel. If
+ %% our channel was being blocked by this process, and no other
+ %% process is blocking our channel, then this channel will be
+ %% unblocked. This means that any credit that was deferred will be
+ %% sent to rabbit_reader processs that might be blocked by this
+ %% particular channel.
+ credit_flow:handle_bump_msg(Msg),
+ noreply(State);
+
+handle_info(timeout, State) ->
+ noreply(State);
+
+handle_info(emit_stats, State) ->
+ emit_stats(State),
+ State1 = rabbit_event:reset_stats_timer(State, #ch.stats_timer),
+ %% NB: don't call noreply/1 since we don't want to kick off the
+ %% stats timer.
+ {noreply, send_confirms_and_nacks(State1), hibernate};
+
+handle_info({'DOWN', _MRef, process, QPid, Reason},
+ #ch{queue_states = QStates0,
+ queue_monitors = _QMons} = State0) ->
+ credit_flow:peer_down(QPid),
+ case rabbit_queue_type:handle_down(QPid, Reason, QStates0) of
+ {ok, QState1, Actions} ->
+ State1 = State0#ch{queue_states = QState1},
+ State = handle_queue_actions(Actions, State1),
+ noreply_coalesce(State);
+ {eol, QRef} ->
+ State1 = handle_consuming_queue_down_or_eol(QRef, State0),
+ {ConfirmMXs, UC1} =
+ rabbit_confirms:remove_queue(QRef, State1#ch.unconfirmed),
+ %% Deleted queue is a special case.
+ %% Do not nack the "rejected" messages.
+ State2 = record_confirms(ConfirmMXs,
+ State1#ch{unconfirmed = UC1}),
+ erase_queue_stats(QRef),
+ noreply_coalesce(
+ State2#ch{queue_states = rabbit_queue_type:remove(QRef, QStates0)})
+ end;
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+ {stop, Reason, State};
+
+handle_info({{Ref, Node}, LateAnswer},
+ State = #ch{cfg = #conf{channel = Channel}})
+ when is_reference(Ref) ->
+ rabbit_log_channel:warning("Channel ~p ignoring late answer ~p from ~p",
+ [Channel, LateAnswer, Node]),
+ noreply(State);
+
+handle_info(tick, State0 = #ch{queue_states = QueueStates0}) ->
+ case get(permission_cache_can_expire) of
+ true -> ok = clear_permission_cache();
+ _ -> ok
+ end,
+ case evaluate_consumer_timeout(State0#ch{queue_states = QueueStates0}) of
+ {noreply, State} ->
+ noreply(init_tick_timer(reset_tick_timer(State)));
+ Return ->
+ Return
+ end;
+handle_info({update_user_state, User}, State = #ch{cfg = Cfg}) ->
+ noreply(State#ch{cfg = Cfg#conf{user = User}}).
+
+
+handle_pre_hibernate(State0) ->
+ ok = clear_permission_cache(),
+ State = maybe_cancel_tick_timer(State0),
+ rabbit_event:if_enabled(
+ State, #ch.stats_timer,
+ fun () -> emit_stats(State,
+ [{idle_since,
+ os:system_time(milli_seconds)}])
+ end),
+ {hibernate, rabbit_event:stop_stats_timer(State, #ch.stats_timer)}.
+
+handle_post_hibernate(State0) ->
+ State = init_tick_timer(State0),
+ {noreply, State}.
+
+terminate(_Reason,
+ State = #ch{cfg = #conf{user = #user{username = Username}},
+ queue_states = QueueCtxs}) ->
+ _ = rabbit_queue_type:close(QueueCtxs),
+ {_Res, _State1} = notify_queues(State),
+ pg_local:leave(rabbit_channels, self()),
+ rabbit_event:if_enabled(State, #ch.stats_timer,
+ fun() -> emit_stats(State) end),
+ [delete_stats(Tag) || {Tag, _} <- get()],
+ rabbit_core_metrics:channel_closed(self()),
+ rabbit_event:notify(channel_closed, [{pid, self()},
+ {user_who_performed_action, Username}]).
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ).
+
+-spec get_max_message_size() -> non_neg_integer().
+
+get_max_message_size() ->
+ case application:get_env(rabbit, max_message_size) of
+ {ok, MS} when is_integer(MS) ->
+ erlang:min(MS, ?MAX_MSG_SIZE);
+ _ ->
+ ?MAX_MSG_SIZE
+ end.
+
+get_consumer_timeout() ->
+ case application:get_env(rabbit, consumer_timeout) of
+ {ok, MS} when is_integer(MS) ->
+ MS;
+ _ ->
+ undefined
+ end.
+%%---------------------------------------------------------------------------
+
+reply(Reply, NewState) -> {reply, Reply, next_state(NewState), hibernate}.
+
+noreply(NewState) -> {noreply, next_state(NewState), hibernate}.
+
+next_state(State) -> ensure_stats_timer(send_confirms_and_nacks(State)).
+
+noreply_coalesce(State = #ch{confirmed = C, rejected = R}) ->
+ Timeout = case {C, R} of {[], []} -> hibernate; _ -> 0 end,
+ {noreply, ensure_stats_timer(State), Timeout}.
+
+ensure_stats_timer(State) ->
+ rabbit_event:ensure_stats_timer(State, #ch.stats_timer, emit_stats).
+
+return_ok(State, true, _Msg) -> {noreply, State};
+return_ok(State, false, Msg) -> {reply, Msg, State}.
+
+ok_msg(true, _Msg) -> undefined;
+ok_msg(false, Msg) -> Msg.
+
+send(_Command, #ch{cfg = #conf{state = closing}}) ->
+ ok;
+send(Command, #ch{cfg = #conf{writer_pid = WriterPid}}) ->
+ ok = rabbit_writer:send_command(WriterPid, Command).
+
+format_soft_error(#amqp_error{name = N, explanation = E, method = M}) ->
+ io_lib:format("operation ~s caused a channel exception ~s: ~ts", [M, N, E]).
+
+handle_exception(Reason, State = #ch{cfg = #conf{protocol = Protocol,
+ channel = Channel,
+ writer_pid = WriterPid,
+ reader_pid = ReaderPid,
+ conn_pid = ConnPid,
+ conn_name = ConnName,
+ virtual_host = VHost,
+ user = User
+ }}) ->
+ %% something bad's happened: notify_queues may not be 'ok'
+ {_Result, State1} = notify_queues(State),
+ case rabbit_binary_generator:map_exception(Channel, Reason, Protocol) of
+ {Channel, CloseMethod} ->
+ rabbit_log_channel:error(
+ "Channel error on connection ~p (~s, vhost: '~s',"
+ " user: '~s'), channel ~p:~n~s~n",
+ [ConnPid, ConnName, VHost, User#user.username,
+ Channel, format_soft_error(Reason)]),
+ ok = rabbit_writer:send_command(WriterPid, CloseMethod),
+ {noreply, State1};
+ {0, _} ->
+ ReaderPid ! {channel_exit, Channel, Reason},
+ {stop, normal, State1}
+ end.
+
+-spec precondition_failed(string()) -> no_return().
+
+precondition_failed(Format) -> precondition_failed(Format, []).
+
+-spec precondition_failed(string(), [any()]) -> no_return().
+
+precondition_failed(Format, Params) ->
+ rabbit_misc:protocol_error(precondition_failed, Format, Params).
+
+return_queue_declare_ok(#resource{name = ActualName},
+ NoWait, MessageCount, ConsumerCount,
+ #ch{cfg = Cfg} = State) ->
+ return_ok(State#ch{cfg = Cfg#conf{most_recently_declared_queue = ActualName}},
+ NoWait, #'queue.declare_ok'{queue = ActualName,
+ message_count = MessageCount,
+ consumer_count = ConsumerCount}).
+
+check_resource_access(User, Resource, Perm, Context) ->
+ V = {Resource, Context, Perm},
+
+ Cache = case get(permission_cache) of
+ undefined -> [];
+ Other -> Other
+ end,
+ case lists:member(V, Cache) of
+ true -> ok;
+ false -> ok = rabbit_access_control:check_resource_access(
+ User, Resource, Perm, Context),
+ CacheTail = lists:sublist(Cache, ?MAX_PERMISSION_CACHE_SIZE-1),
+ put(permission_cache, [V | CacheTail])
+ end.
+
+clear_permission_cache() -> erase(permission_cache),
+ erase(topic_permission_cache),
+ ok.
+
+check_configure_permitted(Resource, User, Context) ->
+ check_resource_access(User, Resource, configure, Context).
+
+check_write_permitted(Resource, User, Context) ->
+ check_resource_access(User, Resource, write, Context).
+
+check_read_permitted(Resource, User, Context) ->
+ check_resource_access(User, Resource, read, Context).
+
+check_write_permitted_on_topic(Resource, User, RoutingKey, AuthzContext) ->
+ check_topic_authorisation(Resource, User, RoutingKey, AuthzContext, write).
+
+check_read_permitted_on_topic(Resource, User, RoutingKey, AuthzContext) ->
+ check_topic_authorisation(Resource, User, RoutingKey, AuthzContext, read).
+
+check_user_id_header(#'P_basic'{user_id = undefined}, _) ->
+ ok;
+check_user_id_header(#'P_basic'{user_id = Username},
+ #ch{cfg = #conf{user = #user{username = Username}}}) ->
+ ok;
+check_user_id_header(
+ #'P_basic'{}, #ch{cfg = #conf{user = #user{authz_backends =
+ [{rabbit_auth_backend_dummy, _}]}}}) ->
+ ok;
+check_user_id_header(#'P_basic'{user_id = Claimed},
+ #ch{cfg = #conf{user = #user{username = Actual,
+ tags = Tags}}}) ->
+ case lists:member(impersonator, Tags) of
+ true -> ok;
+ false -> precondition_failed(
+ "user_id property set to '~s' but authenticated user was "
+ "'~s'", [Claimed, Actual])
+ end.
+
+check_expiration_header(Props) ->
+ case rabbit_basic:parse_expiration(Props) of
+ {ok, _} -> ok;
+ {error, E} -> precondition_failed("invalid expiration '~s': ~p",
+ [Props#'P_basic'.expiration, E])
+ end.
+
+check_internal_exchange(#exchange{name = Name, internal = true}) ->
+ rabbit_misc:protocol_error(access_refused,
+ "cannot publish to internal ~s",
+ [rabbit_misc:rs(Name)]);
+check_internal_exchange(_) ->
+ ok.
+
+check_topic_authorisation(#exchange{name = Name = #resource{virtual_host = VHost}, type = topic},
+ User = #user{username = Username},
+ RoutingKey, AuthzContext, Permission) ->
+ Resource = Name#resource{kind = topic},
+ VariableMap = build_topic_variable_map(AuthzContext, VHost, Username),
+ Context = #{routing_key => RoutingKey,
+ variable_map => VariableMap},
+ Cache = case get(topic_permission_cache) of
+ undefined -> [];
+ Other -> Other
+ end,
+ case lists:member({Resource, Context, Permission}, Cache) of
+ true -> ok;
+ false -> ok = rabbit_access_control:check_topic_access(
+ User, Resource, Permission, Context),
+ CacheTail = lists:sublist(Cache, ?MAX_PERMISSION_CACHE_SIZE-1),
+ put(topic_permission_cache, [{Resource, Context, Permission} | CacheTail])
+ end;
+check_topic_authorisation(_, _, _, _, _) ->
+ ok.
+
+
+build_topic_variable_map(AuthzContext, VHost, Username) when is_map(AuthzContext) ->
+ maps:merge(AuthzContext, #{<<"vhost">> => VHost, <<"username">> => Username});
+build_topic_variable_map(AuthzContext, VHost, Username) ->
+ maps:merge(extract_variable_map_from_amqp_params(AuthzContext), #{<<"vhost">> => VHost, <<"username">> => Username}).
+
+%% Use tuple representation of amqp_params to avoid a dependency on amqp_client.
+%% Extracts variable map only from amqp_params_direct, not amqp_params_network.
+%% amqp_params_direct records are usually used by plugins (e.g. MQTT, STOMP)
+extract_variable_map_from_amqp_params({amqp_params, {amqp_params_direct, _, _, _, _,
+ {amqp_adapter_info, _,_,_,_,_,_,AdditionalInfo}, _}}) ->
+ proplists:get_value(variable_map, AdditionalInfo, #{});
+extract_variable_map_from_amqp_params({amqp_params_direct, _, _, _, _,
+ {amqp_adapter_info, _,_,_,_,_,_,AdditionalInfo}, _}) ->
+ proplists:get_value(variable_map, AdditionalInfo, #{});
+extract_variable_map_from_amqp_params([Value]) ->
+ extract_variable_map_from_amqp_params(Value);
+extract_variable_map_from_amqp_params(_) ->
+ #{}.
+
+check_msg_size(Content, MaxMessageSize, GCThreshold) ->
+ Size = rabbit_basic:maybe_gc_large_msg(Content, GCThreshold),
+ case Size of
+ S when S > MaxMessageSize ->
+ ErrorMessage = case MaxMessageSize of
+ ?MAX_MSG_SIZE ->
+ "message size ~B is larger than max size ~B";
+ _ ->
+ "message size ~B is larger than configured max size ~B"
+ end,
+ precondition_failed(ErrorMessage,
+ [Size, MaxMessageSize]);
+ _ -> ok
+ end.
+
+check_vhost_queue_limit(#resource{name = QueueName}, VHost) ->
+ case rabbit_vhost_limit:is_over_queue_limit(VHost) of
+ false -> ok;
+ {true, Limit} -> precondition_failed("cannot declare queue '~s': "
+ "queue limit in vhost '~s' (~p) is reached",
+ [QueueName, VHost, Limit])
+
+ end.
+
+qbin_to_resource(QueueNameBin, VHostPath) ->
+ name_to_resource(queue, QueueNameBin, VHostPath).
+
+name_to_resource(Type, NameBin, VHostPath) ->
+ rabbit_misc:r(VHostPath, Type, NameBin).
+
+expand_queue_name_shortcut(<<>>, #ch{cfg = #conf{most_recently_declared_queue = <<>>}}) ->
+ rabbit_misc:protocol_error(not_found, "no previously declared queue", []);
+expand_queue_name_shortcut(<<>>, #ch{cfg = #conf{most_recently_declared_queue = MRDQ}}) ->
+ MRDQ;
+expand_queue_name_shortcut(QueueNameBin, _) ->
+ QueueNameBin.
+
+expand_routing_key_shortcut(<<>>, <<>>,
+ #ch{cfg = #conf{most_recently_declared_queue = <<>>}}) ->
+ rabbit_misc:protocol_error(not_found, "no previously declared queue", []);
+expand_routing_key_shortcut(<<>>, <<>>,
+ #ch{cfg = #conf{most_recently_declared_queue = MRDQ}}) ->
+ MRDQ;
+expand_routing_key_shortcut(_QueueNameBin, RoutingKey, _State) ->
+ RoutingKey.
+
+expand_shortcuts(#'basic.get' {queue = Q} = M, State) ->
+ M#'basic.get' {queue = expand_queue_name_shortcut(Q, State)};
+expand_shortcuts(#'basic.consume'{queue = Q} = M, State) ->
+ M#'basic.consume'{queue = expand_queue_name_shortcut(Q, State)};
+expand_shortcuts(#'queue.delete' {queue = Q} = M, State) ->
+ M#'queue.delete' {queue = expand_queue_name_shortcut(Q, State)};
+expand_shortcuts(#'queue.purge' {queue = Q} = M, State) ->
+ M#'queue.purge' {queue = expand_queue_name_shortcut(Q, State)};
+expand_shortcuts(#'queue.bind' {queue = Q, routing_key = K} = M, State) ->
+ M#'queue.bind' {queue = expand_queue_name_shortcut(Q, State),
+ routing_key = expand_routing_key_shortcut(Q, K, State)};
+expand_shortcuts(#'queue.unbind' {queue = Q, routing_key = K} = M, State) ->
+ M#'queue.unbind' {queue = expand_queue_name_shortcut(Q, State),
+ routing_key = expand_routing_key_shortcut(Q, K, State)};
+expand_shortcuts(M, _State) ->
+ M.
+
+check_not_default_exchange(#resource{kind = exchange, name = <<"">>}) ->
+ rabbit_misc:protocol_error(
+ access_refused, "operation not permitted on the default exchange", []);
+check_not_default_exchange(_) ->
+ ok.
+
+check_exchange_deletion(XName = #resource{name = <<"amq.", _/binary>>,
+ kind = exchange}) ->
+ rabbit_misc:protocol_error(
+ access_refused, "deletion of system ~s not allowed",
+ [rabbit_misc:rs(XName)]);
+check_exchange_deletion(_) ->
+ ok.
+
+%% check that an exchange/queue name does not contain the reserved
+%% "amq." prefix.
+%%
+%% As per the AMQP 0-9-1 spec, the exclusion of "amq." prefixed names
+%% only applies on actual creation, and not in the cases where the
+%% entity already exists or passive=true.
+%%
+%% NB: We deliberately do not enforce the other constraints on names
+%% required by the spec.
+check_name(Kind, NameBin = <<"amq.", _/binary>>) ->
+ rabbit_misc:protocol_error(
+ access_refused,
+ "~s name '~s' contains reserved prefix 'amq.*'",[Kind, NameBin]);
+check_name(_Kind, NameBin) ->
+ NameBin.
+
+strip_cr_lf(NameBin) ->
+ binary:replace(NameBin, [<<"\n">>, <<"\r">>], <<"">>, [global]).
+
+
+maybe_set_fast_reply_to(
+ C = #content{properties = P = #'P_basic'{reply_to =
+ <<"amq.rabbitmq.reply-to">>}},
+ #ch{reply_consumer = ReplyConsumer}) ->
+ case ReplyConsumer of
+ none -> rabbit_misc:protocol_error(
+ precondition_failed,
+ "fast reply consumer does not exist", []);
+ {_, Suf, _K} -> Rep = <<"amq.rabbitmq.reply-to.", Suf/binary>>,
+ rabbit_binary_generator:clear_encoded_content(
+ C#content{properties = P#'P_basic'{reply_to = Rep}})
+ end;
+maybe_set_fast_reply_to(C, _State) ->
+ C.
+
+record_rejects([], State) ->
+ State;
+record_rejects(MXs, State = #ch{rejected = R, tx = Tx}) ->
+ Tx1 = case Tx of
+ none -> none;
+ _ -> failed
+ end,
+ State#ch{rejected = [MXs | R], tx = Tx1}.
+
+record_confirms([], State) ->
+ State;
+record_confirms(MXs, State = #ch{confirmed = C}) ->
+ State#ch{confirmed = [MXs | C]}.
+
+handle_method({Method, Content}, State) ->
+ handle_method(Method, Content, State).
+
+handle_method(#'channel.open'{}, _,
+ State = #ch{cfg = #conf{state = starting} = Cfg}) ->
+ %% Don't leave "starting" as the state for 5s. TODO is this TRTTD?
+ State1 = State#ch{cfg = Cfg#conf{state = running}},
+ rabbit_event:if_enabled(State1, #ch.stats_timer,
+ fun() -> emit_stats(State1) end),
+ {reply, #'channel.open_ok'{}, State1};
+
+handle_method(#'channel.open'{}, _, _State) ->
+ rabbit_misc:protocol_error(
+ channel_error, "second 'channel.open' seen", []);
+
+handle_method(_Method, _, #ch{cfg = #conf{state = starting}}) ->
+ rabbit_misc:protocol_error(channel_error, "expected 'channel.open'", []);
+
+handle_method(#'channel.close_ok'{}, _, #ch{cfg = #conf{state = closing}}) ->
+ stop;
+
+handle_method(#'channel.close'{}, _,
+ State = #ch{cfg = #conf{state = closing,
+ writer_pid = WriterPid}}) ->
+ ok = rabbit_writer:send_command(WriterPid, #'channel.close_ok'{}),
+ {noreply, State};
+
+handle_method(_Method, _, State = #ch{cfg = #conf{state = closing}}) ->
+ {noreply, State};
+
+handle_method(#'channel.close'{}, _,
+ State = #ch{cfg = #conf{reader_pid = ReaderPid}}) ->
+ {_Result, State1} = notify_queues(State),
+ %% We issue the channel.close_ok response after a handshake with
+ %% the reader, the other half of which is ready_for_close. That
+ %% way the reader forgets about the channel before we send the
+ %% response (and this channel process terminates). If we didn't do
+ %% that, a channel.open for the same channel number, which a
+ %% client is entitled to send as soon as it has received the
+ %% close_ok, might be received by the reader before it has seen
+ %% the termination and hence be sent to the old, now dead/dying
+ %% channel process, instead of a new process, and thus lost.
+ ReaderPid ! {channel_closing, self()},
+ {noreply, State1};
+
+%% Even though the spec prohibits the client from sending commands
+%% while waiting for the reply to a synchronous command, we generally
+%% do allow this...except in the case of a pending tx.commit, where
+%% it could wreak havoc.
+handle_method(_Method, _, #ch{tx = Tx})
+ when Tx =:= committing orelse Tx =:= failed ->
+ rabbit_misc:protocol_error(
+ channel_error, "unexpected command while processing 'tx.commit'", []);
+
+handle_method(#'access.request'{},_, State) ->
+ {reply, #'access.request_ok'{ticket = 1}, State};
+
+handle_method(#'basic.publish'{immediate = true}, _Content, _State) ->
+ rabbit_misc:protocol_error(not_implemented, "immediate=true", []);
+
+handle_method(#'basic.publish'{exchange = ExchangeNameBin,
+ routing_key = RoutingKey,
+ mandatory = Mandatory},
+ Content, State = #ch{cfg = #conf{channel = ChannelNum,
+ conn_name = ConnName,
+ virtual_host = VHostPath,
+ user = #user{username = Username} = User,
+ trace_state = TraceState,
+ max_message_size = MaxMessageSize,
+ authz_context = AuthzContext,
+ writer_gc_threshold = GCThreshold
+ },
+ tx = Tx,
+ confirm_enabled = ConfirmEnabled,
+ delivery_flow = Flow
+ }) ->
+ check_msg_size(Content, MaxMessageSize, GCThreshold),
+ ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
+ check_write_permitted(ExchangeName, User, AuthzContext),
+ Exchange = rabbit_exchange:lookup_or_die(ExchangeName),
+ check_internal_exchange(Exchange),
+ check_write_permitted_on_topic(Exchange, User, RoutingKey, AuthzContext),
+ %% We decode the content's properties here because we're almost
+ %% certain to want to look at delivery-mode and priority.
+ DecodedContent = #content {properties = Props} =
+ maybe_set_fast_reply_to(
+ rabbit_binary_parser:ensure_content_decoded(Content), State),
+ check_user_id_header(Props, State),
+ check_expiration_header(Props),
+ DoConfirm = Tx =/= none orelse ConfirmEnabled,
+ {MsgSeqNo, State1} =
+ case DoConfirm orelse Mandatory of
+ false -> {undefined, State};
+ true -> SeqNo = State#ch.publish_seqno,
+ {SeqNo, State#ch{publish_seqno = SeqNo + 1}}
+ end,
+ case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of
+ {ok, Message} ->
+ Delivery = rabbit_basic:delivery(
+ Mandatory, DoConfirm, Message, MsgSeqNo),
+ QNames = rabbit_exchange:route(Exchange, Delivery),
+ rabbit_trace:tap_in(Message, QNames, ConnName, ChannelNum,
+ Username, TraceState),
+ DQ = {Delivery#delivery{flow = Flow}, QNames},
+ {noreply, case Tx of
+ none -> deliver_to_queues(DQ, State1);
+ {Msgs, Acks} -> Msgs1 = ?QUEUE:in(DQ, Msgs),
+ State1#ch{tx = {Msgs1, Acks}}
+ end};
+ {error, Reason} ->
+ precondition_failed("invalid message: ~p", [Reason])
+ end;
+
+handle_method(#'basic.nack'{delivery_tag = DeliveryTag,
+ multiple = Multiple,
+ requeue = Requeue}, _, State) ->
+ reject(DeliveryTag, Requeue, Multiple, State);
+
+handle_method(#'basic.ack'{delivery_tag = DeliveryTag,
+ multiple = Multiple},
+ _, State = #ch{unacked_message_q = UAMQ, tx = Tx}) ->
+ {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple),
+ State1 = State#ch{unacked_message_q = Remaining},
+ {noreply, case Tx of
+ none -> {State2, Actions} = ack(Acked, State1),
+ handle_queue_actions(Actions, State2);
+ {Msgs, Acks} -> Acks1 = ack_cons(ack, Acked, Acks),
+ State1#ch{tx = {Msgs, Acks1}}
+ end};
+
+handle_method(#'basic.get'{queue = QueueNameBin, no_ack = NoAck},
+ _, State = #ch{cfg = #conf{writer_pid = WriterPid,
+ conn_pid = ConnPid,
+ user = User,
+ virtual_host = VHostPath,
+ authz_context = AuthzContext
+ },
+ limiter = Limiter,
+ next_tag = DeliveryTag,
+ queue_states = QueueStates0}) ->
+ QueueName = qbin_to_resource(QueueNameBin, VHostPath),
+ check_read_permitted(QueueName, User, AuthzContext),
+ case rabbit_amqqueue:with_exclusive_access_or_die(
+ QueueName, ConnPid,
+ %% Use the delivery tag as consumer tag for quorum queues
+ fun (Q) ->
+ rabbit_queue_type:dequeue(
+ Q, NoAck, rabbit_limiter:pid(Limiter),
+ DeliveryTag, QueueStates0)
+ end) of
+ {ok, MessageCount, Msg, QueueStates} ->
+ handle_basic_get(WriterPid, DeliveryTag, NoAck, MessageCount, Msg,
+ State#ch{queue_states = QueueStates});
+ {empty, QueueStates} ->
+ ?INCR_STATS(queue_stats, QueueName, 1, get_empty, State),
+ {reply, #'basic.get_empty'{}, State#ch{queue_states = QueueStates}};
+ empty ->
+ ?INCR_STATS(queue_stats, QueueName, 1, get_empty, State),
+ {reply, #'basic.get_empty'{}, State};
+ {error, {unsupported, single_active_consumer}} ->
+ rabbit_misc:protocol_error(
+ resource_locked,
+ "cannot obtain access to locked ~s. basic.get operations "
+ "are not supported by quorum queues with single active consumer",
+ [rabbit_misc:rs(QueueName)]);
+ {error, Reason} ->
+ %% TODO add queue type to error message
+ rabbit_misc:protocol_error(internal_error,
+ "Cannot get a message from queue '~s': ~p",
+ [rabbit_misc:rs(QueueName), Reason]);
+ {protocol_error, Type, Reason, ReasonArgs} ->
+ rabbit_misc:protocol_error(Type, Reason, ReasonArgs)
+ end;
+
+handle_method(#'basic.consume'{queue = <<"amq.rabbitmq.reply-to">>,
+ consumer_tag = CTag0,
+ no_ack = NoAck,
+ nowait = NoWait},
+ _, State = #ch{reply_consumer = ReplyConsumer,
+ consumer_mapping = ConsumerMapping}) ->
+ case maps:find(CTag0, ConsumerMapping) of
+ error ->
+ case {ReplyConsumer, NoAck} of
+ {none, true} ->
+ CTag = case CTag0 of
+ <<>> -> rabbit_guid:binary(
+ rabbit_guid:gen_secure(), "amq.ctag");
+ Other -> Other
+ end,
+ %% Precalculate both suffix and key; base64 encoding is
+ %% expensive
+ Key = base64:encode(rabbit_guid:gen_secure()),
+ PidEnc = base64:encode(term_to_binary(self())),
+ Suffix = <<PidEnc/binary, ".", Key/binary>>,
+ Consumer = {CTag, Suffix, binary_to_list(Key)},
+ State1 = State#ch{reply_consumer = Consumer},
+ case NoWait of
+ true -> {noreply, State1};
+ false -> Rep = #'basic.consume_ok'{consumer_tag = CTag},
+ {reply, Rep, State1}
+ end;
+ {_, false} ->
+ rabbit_misc:protocol_error(
+ precondition_failed,
+ "reply consumer cannot acknowledge", []);
+ _ ->
+ rabbit_misc:protocol_error(
+ precondition_failed, "reply consumer already set", [])
+ end;
+ {ok, _} ->
+ %% Attempted reuse of consumer tag.
+ rabbit_misc:protocol_error(
+ not_allowed, "attempt to reuse consumer tag '~s'", [CTag0])
+ end;
+
+handle_method(#'basic.cancel'{consumer_tag = ConsumerTag, nowait = NoWait},
+ _, State = #ch{reply_consumer = {ConsumerTag, _, _}}) ->
+ State1 = State#ch{reply_consumer = none},
+ case NoWait of
+ true -> {noreply, State1};
+ false -> Rep = #'basic.cancel_ok'{consumer_tag = ConsumerTag},
+ {reply, Rep, State1}
+ end;
+
+handle_method(#'basic.consume'{queue = QueueNameBin,
+ consumer_tag = ConsumerTag,
+ no_local = _, % FIXME: implement
+ no_ack = NoAck,
+ exclusive = ExclusiveConsume,
+ nowait = NoWait,
+ arguments = Args},
+ _, State = #ch{cfg = #conf{consumer_prefetch = ConsumerPrefetch,
+ user = User,
+ virtual_host = VHostPath,
+ authz_context = AuthzContext},
+ consumer_mapping = ConsumerMapping
+ }) ->
+ case maps:find(ConsumerTag, ConsumerMapping) of
+ error ->
+ QueueName = qbin_to_resource(QueueNameBin, VHostPath),
+ check_read_permitted(QueueName, User, AuthzContext),
+ ActualConsumerTag =
+ case ConsumerTag of
+ <<>> -> rabbit_guid:binary(rabbit_guid:gen_secure(),
+ "amq.ctag");
+ Other -> Other
+ end,
+ case basic_consume(
+ QueueName, NoAck, ConsumerPrefetch, ActualConsumerTag,
+ ExclusiveConsume, Args, NoWait, State) of
+ {ok, State1} ->
+ {noreply, State1};
+ {error, exclusive_consume_unavailable} ->
+ rabbit_misc:protocol_error(
+ access_refused, "~s in exclusive use",
+ [rabbit_misc:rs(QueueName)]);
+ {error, global_qos_not_supported_for_queue_type} ->
+ rabbit_misc:protocol_error(
+ not_implemented, "~s does not support global qos",
+ [rabbit_misc:rs(QueueName)])
+ end;
+ {ok, _} ->
+ %% Attempted reuse of consumer tag.
+ rabbit_misc:protocol_error(
+ not_allowed, "attempt to reuse consumer tag '~s'", [ConsumerTag])
+ end;
+
+handle_method(#'basic.cancel'{consumer_tag = ConsumerTag, nowait = NoWait},
+ _, State = #ch{cfg = #conf{user = #user{username = Username}},
+ consumer_mapping = ConsumerMapping,
+ queue_consumers = QCons,
+ queue_states = QueueStates0}) ->
+ OkMsg = #'basic.cancel_ok'{consumer_tag = ConsumerTag},
+ case maps:find(ConsumerTag, ConsumerMapping) of
+ error ->
+ %% Spec requires we ignore this situation.
+ return_ok(State, NoWait, OkMsg);
+ {ok, {Q, _CParams}} when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+
+ ConsumerMapping1 = maps:remove(ConsumerTag, ConsumerMapping),
+ QCons1 =
+ case maps:find(QName, QCons) of
+ error -> QCons;
+ {ok, CTags} -> CTags1 = gb_sets:delete(ConsumerTag, CTags),
+ case gb_sets:is_empty(CTags1) of
+ true -> maps:remove(QName, QCons);
+ false -> maps:put(QName, CTags1, QCons)
+ end
+ end,
+ NewState = State#ch{consumer_mapping = ConsumerMapping1,
+ queue_consumers = QCons1},
+ %% In order to ensure that no more messages are sent to
+ %% the consumer after the cancel_ok has been sent, we get
+ %% the queue process to send the cancel_ok on our
+ %% behalf. If we were sending the cancel_ok ourselves it
+ %% might overtake a message sent previously by the queue.
+ case rabbit_misc:with_exit_handler(
+ fun () -> {error, not_found} end,
+ fun () ->
+ rabbit_queue_type:cancel(
+ Q, ConsumerTag, ok_msg(NoWait, OkMsg),
+ Username, QueueStates0)
+ end) of
+ {ok, QueueStates} ->
+ {noreply, NewState#ch{queue_states = QueueStates}};
+ {error, not_found} ->
+ %% Spec requires we ignore this situation.
+ return_ok(NewState, NoWait, OkMsg)
+ end
+ end;
+
+handle_method(#'basic.qos'{prefetch_size = Size}, _, _State) when Size /= 0 ->
+ rabbit_misc:protocol_error(not_implemented,
+ "prefetch_size!=0 (~w)", [Size]);
+
+handle_method(#'basic.qos'{global = false,
+ prefetch_count = PrefetchCount},
+ _, State = #ch{cfg = Cfg,
+ limiter = Limiter}) ->
+ %% Ensures that if default was set, it's overridden
+ Limiter1 = rabbit_limiter:unlimit_prefetch(Limiter),
+ {reply, #'basic.qos_ok'{}, State#ch{cfg = Cfg#conf{consumer_prefetch = PrefetchCount},
+ limiter = Limiter1}};
+
+handle_method(#'basic.qos'{global = true,
+ prefetch_count = 0},
+ _, State = #ch{limiter = Limiter}) ->
+ Limiter1 = rabbit_limiter:unlimit_prefetch(Limiter),
+ {reply, #'basic.qos_ok'{}, State#ch{limiter = Limiter1}};
+
+handle_method(#'basic.qos'{global = true,
+ prefetch_count = PrefetchCount},
+ _, State = #ch{limiter = Limiter, unacked_message_q = UAMQ}) ->
+ %% TODO ?QUEUE:len(UAMQ) is not strictly right since that counts
+ %% unacked messages from basic.get too. Pretty obscure though.
+ Limiter1 = rabbit_limiter:limit_prefetch(Limiter,
+ PrefetchCount, ?QUEUE:len(UAMQ)),
+ case ((not rabbit_limiter:is_active(Limiter)) andalso
+ rabbit_limiter:is_active(Limiter1)) of
+ true -> rabbit_amqqueue:activate_limit_all(
+ classic_consumer_queue_pids(State#ch.consumer_mapping), self());
+ false -> ok
+ end,
+ {reply, #'basic.qos_ok'{}, State#ch{limiter = Limiter1}};
+
+handle_method(#'basic.recover_async'{requeue = true},
+ _, State = #ch{unacked_message_q = UAMQ,
+ limiter = Limiter,
+ queue_states = QueueStates0}) ->
+ OkFun = fun () -> ok end,
+ UAMQL = ?QUEUE:to_list(UAMQ),
+ {QueueStates, Actions} =
+ foreach_per_queue(
+ fun ({QPid, CTag}, MsgIds, {Acc0, Actions0}) ->
+ rabbit_misc:with_exit_handler(
+ OkFun,
+ fun () ->
+ {ok, Acc, Act} = rabbit_amqqueue:requeue(QPid, {CTag, MsgIds}, Acc0),
+ {Acc, Act ++ Actions0}
+ end)
+ end, lists:reverse(UAMQL), {QueueStates0, []}),
+ ok = notify_limiter(Limiter, UAMQL),
+ State1 = handle_queue_actions(Actions, State#ch{unacked_message_q = ?QUEUE:new(),
+ queue_states = QueueStates}),
+ %% No answer required - basic.recover is the newer, synchronous
+ %% variant of this method
+ {noreply, State1};
+
+handle_method(#'basic.recover_async'{requeue = false}, _, _State) ->
+ rabbit_misc:protocol_error(not_implemented, "requeue=false", []);
+
+handle_method(#'basic.recover'{requeue = Requeue}, Content, State) ->
+ {noreply, State1} = handle_method(#'basic.recover_async'{requeue = Requeue},
+ Content, State),
+ {reply, #'basic.recover_ok'{}, State1};
+
+handle_method(#'basic.reject'{delivery_tag = DeliveryTag, requeue = Requeue},
+ _, State) ->
+ reject(DeliveryTag, Requeue, false, State);
+
+handle_method(#'exchange.declare'{nowait = NoWait} = Method,
+ _, State = #ch{cfg = #conf{virtual_host = VHostPath,
+ user = User,
+ queue_collector_pid = CollectorPid,
+ conn_pid = ConnPid,
+ authz_context = AuthzContext}}) ->
+ handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User),
+ return_ok(State, NoWait, #'exchange.declare_ok'{});
+
+handle_method(#'exchange.delete'{nowait = NoWait} = Method,
+ _, State = #ch{cfg = #conf{conn_pid = ConnPid,
+ authz_context = AuthzContext,
+ virtual_host = VHostPath,
+ queue_collector_pid = CollectorPid,
+ user = User}}) ->
+ handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User),
+ return_ok(State, NoWait, #'exchange.delete_ok'{});
+
+handle_method(#'exchange.bind'{nowait = NoWait} = Method,
+ _, State = #ch{cfg = #conf{virtual_host = VHostPath,
+ conn_pid = ConnPid,
+ authz_context = AuthzContext,
+ queue_collector_pid = CollectorPid,
+ user = User}}) ->
+ handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User),
+ return_ok(State, NoWait, #'exchange.bind_ok'{});
+
+handle_method(#'exchange.unbind'{nowait = NoWait} = Method,
+ _, State = #ch{cfg = #conf{virtual_host = VHostPath,
+ conn_pid = ConnPid,
+ authz_context = AuthzContext,
+ queue_collector_pid = CollectorPid,
+ user = User}}) ->
+ handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User),
+ return_ok(State, NoWait, #'exchange.unbind_ok'{});
+
+handle_method(#'queue.declare'{nowait = NoWait} = Method,
+ _, State = #ch{cfg = #conf{virtual_host = VHostPath,
+ conn_pid = ConnPid,
+ authz_context = AuthzContext,
+ queue_collector_pid = CollectorPid,
+ user = User}}) ->
+ {ok, QueueName, MessageCount, ConsumerCount} =
+ handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User),
+ return_queue_declare_ok(QueueName, NoWait, MessageCount,
+ ConsumerCount, State);
+
+handle_method(#'queue.delete'{nowait = NoWait} = Method, _,
+ State = #ch{cfg = #conf{conn_pid = ConnPid,
+ authz_context = AuthzContext,
+ virtual_host = VHostPath,
+ queue_collector_pid = CollectorPid,
+ user = User}}) ->
+ {ok, PurgedMessageCount} =
+ handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User),
+ return_ok(State, NoWait,
+ #'queue.delete_ok'{message_count = PurgedMessageCount});
+
+handle_method(#'queue.bind'{nowait = NoWait} = Method, _,
+ State = #ch{cfg = #conf{conn_pid = ConnPid,
+ authz_context = AuthzContext,
+ user = User,
+ queue_collector_pid = CollectorPid,
+ virtual_host = VHostPath}}) ->
+ handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User),
+ return_ok(State, NoWait, #'queue.bind_ok'{});
+
+handle_method(#'queue.unbind'{} = Method, _,
+ State = #ch{cfg = #conf{conn_pid = ConnPid,
+ authz_context = AuthzContext,
+ user = User,
+ queue_collector_pid = CollectorPid,
+ virtual_host = VHostPath}}) ->
+ handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User),
+ return_ok(State, false, #'queue.unbind_ok'{});
+
+handle_method(#'queue.purge'{nowait = NoWait} = Method,
+ _, State = #ch{cfg = #conf{conn_pid = ConnPid,
+ authz_context = AuthzContext,
+ user = User,
+ queue_collector_pid = CollectorPid,
+ virtual_host = VHostPath}}) ->
+ case handle_method(Method, ConnPid, AuthzContext, CollectorPid,
+ VHostPath, User) of
+ {ok, PurgedMessageCount} ->
+ return_ok(State, NoWait,
+ #'queue.purge_ok'{message_count = PurgedMessageCount})
+ end;
+
+handle_method(#'tx.select'{}, _, #ch{confirm_enabled = true}) ->
+ precondition_failed("cannot switch from confirm to tx mode");
+
+handle_method(#'tx.select'{}, _, State = #ch{tx = none}) ->
+ {reply, #'tx.select_ok'{}, State#ch{tx = new_tx()}};
+
+handle_method(#'tx.select'{}, _, State) ->
+ {reply, #'tx.select_ok'{}, State};
+
+handle_method(#'tx.commit'{}, _, #ch{tx = none}) ->
+ precondition_failed("channel is not transactional");
+
+handle_method(#'tx.commit'{}, _, State = #ch{tx = {Msgs, Acks},
+ limiter = Limiter}) ->
+ State1 = queue_fold(fun deliver_to_queues/2, State, Msgs),
+ Rev = fun (X) -> lists:reverse(lists:sort(X)) end,
+ {State2, Actions2} =
+ lists:foldl(fun ({ack, A}, {Acc, Actions}) ->
+ {Acc0, Actions0} = ack(Rev(A), Acc),
+ {Acc0, Actions ++ Actions0};
+ ({Requeue, A}, {Acc, Actions}) ->
+ {Acc0, Actions0} = internal_reject(Requeue, Rev(A), Limiter, Acc),
+ {Acc0, Actions ++ Actions0}
+ end, {State1, []}, lists:reverse(Acks)),
+ State3 = handle_queue_actions(Actions2, State2),
+ {noreply, maybe_complete_tx(State3#ch{tx = committing})};
+
+handle_method(#'tx.rollback'{}, _, #ch{tx = none}) ->
+ precondition_failed("channel is not transactional");
+
+handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q = UAMQ,
+ tx = {_Msgs, Acks}}) ->
+ AcksL = lists:append(lists:reverse([lists:reverse(L) || {_, L} <- Acks])),
+ UAMQ1 = ?QUEUE:from_list(lists:usort(AcksL ++ ?QUEUE:to_list(UAMQ))),
+ {reply, #'tx.rollback_ok'{}, State#ch{unacked_message_q = UAMQ1,
+ tx = new_tx()}};
+
+handle_method(#'confirm.select'{}, _, #ch{tx = {_, _}}) ->
+ precondition_failed("cannot switch from tx to confirm mode");
+
+handle_method(#'confirm.select'{nowait = NoWait}, _, State) ->
+ return_ok(State#ch{confirm_enabled = true},
+ NoWait, #'confirm.select_ok'{});
+
+handle_method(#'channel.flow'{active = true}, _, State) ->
+ {reply, #'channel.flow_ok'{active = true}, State};
+
+handle_method(#'channel.flow'{active = false}, _, _State) ->
+ rabbit_misc:protocol_error(not_implemented, "active=false", []);
+
+handle_method(#'basic.credit'{consumer_tag = CTag,
+ credit = Credit,
+ drain = Drain},
+ _, State = #ch{consumer_mapping = Consumers,
+ queue_states = QStates0}) ->
+ case maps:find(CTag, Consumers) of
+ {ok, {Q, _CParams}} ->
+ {ok, QStates, Actions} = rabbit_queue_type:credit(Q, CTag, Credit, Drain, QStates0),
+ {noreply, handle_queue_actions(Actions, State#ch{queue_states = QStates})};
+ error -> precondition_failed(
+ "unknown consumer tag '~s'", [CTag])
+ end;
+
+handle_method(_MethodRecord, _Content, _State) ->
+ rabbit_misc:protocol_error(
+ command_invalid, "unimplemented method", []).
+
+%%----------------------------------------------------------------------------
+
+%% We get the queue process to send the consume_ok on our behalf. This
+%% is for symmetry with basic.cancel - see the comment in that method
+%% for why.
+basic_consume(QueueName, NoAck, ConsumerPrefetch, ActualConsumerTag,
+ ExclusiveConsume, Args, NoWait,
+ State = #ch{cfg = #conf{conn_pid = ConnPid,
+ user = #user{username = Username}},
+ limiter = Limiter,
+ consumer_mapping = ConsumerMapping,
+ queue_states = QueueStates0}) ->
+ case rabbit_amqqueue:with_exclusive_access_or_die(
+ QueueName, ConnPid,
+ fun (Q) ->
+ {rabbit_amqqueue:basic_consume(
+ Q, NoAck, self(),
+ rabbit_limiter:pid(Limiter),
+ rabbit_limiter:is_active(Limiter),
+ ConsumerPrefetch, ActualConsumerTag,
+ ExclusiveConsume, Args,
+ ok_msg(NoWait, #'basic.consume_ok'{
+ consumer_tag = ActualConsumerTag}),
+ Username, QueueStates0),
+ Q}
+ end) of
+ {{ok, QueueStates, Actions}, Q} when ?is_amqqueue(Q) ->
+ CM1 = maps:put(
+ ActualConsumerTag,
+ {Q, {NoAck, ConsumerPrefetch, ExclusiveConsume, Args}},
+ ConsumerMapping),
+
+ State1 = State#ch{consumer_mapping = CM1,
+ queue_states = QueueStates},
+ State2 = handle_queue_actions(Actions, State1),
+ {ok, case NoWait of
+ true -> consumer_monitor(ActualConsumerTag, State2);
+ false -> State2
+ end};
+ {{error, exclusive_consume_unavailable} = E, _Q} ->
+ E;
+ {{error, global_qos_not_supported_for_queue_type} = E, _Q} ->
+ E;
+ {{protocol_error, Type, Reason, ReasonArgs}, _Q} ->
+ rabbit_misc:protocol_error(Type, Reason, ReasonArgs)
+ end.
+
+maybe_stat(false, Q) -> rabbit_amqqueue:stat(Q);
+maybe_stat(true, _Q) -> {ok, 0, 0}.
+
+consumer_monitor(ConsumerTag,
+ State = #ch{consumer_mapping = ConsumerMapping,
+ queue_consumers = QCons}) ->
+ {Q, _} = maps:get(ConsumerTag, ConsumerMapping),
+ QRef = amqqueue:get_name(Q),
+ CTags1 = case maps:find(QRef, QCons) of
+ {ok, CTags} -> gb_sets:insert(ConsumerTag, CTags);
+ error -> gb_sets:singleton(ConsumerTag)
+ end,
+ QCons1 = maps:put(QRef, CTags1, QCons),
+ State#ch{queue_consumers = QCons1}.
+
+handle_consuming_queue_down_or_eol(QName,
+ State = #ch{queue_consumers = QCons}) ->
+ ConsumerTags = case maps:find(QName, QCons) of
+ error -> gb_sets:new();
+ {ok, CTags} -> CTags
+ end,
+ gb_sets:fold(
+ fun (CTag, StateN = #ch{consumer_mapping = CMap}) ->
+ case queue_down_consumer_action(CTag, CMap) of
+ remove ->
+ cancel_consumer(CTag, QName, StateN);
+ {recover, {NoAck, ConsumerPrefetch, Exclusive, Args}} ->
+ case catch basic_consume(
+ QName, NoAck, ConsumerPrefetch, CTag,
+ Exclusive, Args, true, StateN) of
+ {ok, StateN1} ->
+ StateN1;
+ _Err ->
+ cancel_consumer(CTag, QName, StateN)
+ end
+ end
+ end, State#ch{queue_consumers = maps:remove(QName, QCons)}, ConsumerTags).
+
+%% [0] There is a slight danger here that if a queue is deleted and
+%% then recreated again the reconsume will succeed even though it was
+%% not an HA failover. But the likelihood is not great and most users
+%% are unlikely to care.
+
+cancel_consumer(CTag, QName,
+ State = #ch{cfg = #conf{capabilities = Capabilities},
+ consumer_mapping = CMap}) ->
+ case rabbit_misc:table_lookup(
+ Capabilities, <<"consumer_cancel_notify">>) of
+ {bool, true} -> ok = send(#'basic.cancel'{consumer_tag = CTag,
+ nowait = true}, State);
+ _ -> ok
+ end,
+ rabbit_event:notify(consumer_deleted, [{consumer_tag, CTag},
+ {channel, self()},
+ {queue, QName}]),
+ State#ch{consumer_mapping = maps:remove(CTag, CMap)}.
+
+queue_down_consumer_action(CTag, CMap) ->
+ {_, {_, _, _, Args} = ConsumeSpec} = maps:get(CTag, CMap),
+ case rabbit_misc:table_lookup(Args, <<"x-cancel-on-ha-failover">>) of
+ {bool, true} -> remove;
+ _ -> {recover, ConsumeSpec}
+ end.
+
+binding_action(Fun, SourceNameBin0, DestinationType, DestinationNameBin0,
+ RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext,
+ #user{username = Username} = User) ->
+ ExchangeNameBin = strip_cr_lf(SourceNameBin0),
+ DestinationNameBin = strip_cr_lf(DestinationNameBin0),
+ DestinationName = name_to_resource(DestinationType, DestinationNameBin, VHostPath),
+ check_write_permitted(DestinationName, User, AuthzContext),
+ ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
+ [check_not_default_exchange(N) || N <- [DestinationName, ExchangeName]],
+ check_read_permitted(ExchangeName, User, AuthzContext),
+ case rabbit_exchange:lookup(ExchangeName) of
+ {error, not_found} ->
+ ok;
+ {ok, Exchange} ->
+ check_read_permitted_on_topic(Exchange, User, RoutingKey, AuthzContext)
+ end,
+ case Fun(#binding{source = ExchangeName,
+ destination = DestinationName,
+ key = RoutingKey,
+ args = Arguments},
+ fun (_X, Q) when ?is_amqqueue(Q) ->
+ try rabbit_amqqueue:check_exclusive_access(Q, ConnPid)
+ catch exit:Reason -> {error, Reason}
+ end;
+ (_X, #exchange{}) ->
+ ok
+ end,
+ Username) of
+ {error, {resources_missing, [{not_found, Name} | _]}} ->
+ rabbit_amqqueue:not_found(Name);
+ {error, {resources_missing, [{absent, Q, Reason} | _]}} ->
+ rabbit_amqqueue:absent(Q, Reason);
+ {error, binding_not_found} ->
+ rabbit_misc:protocol_error(
+ not_found, "no binding ~s between ~s and ~s",
+ [RoutingKey, rabbit_misc:rs(ExchangeName),
+ rabbit_misc:rs(DestinationName)]);
+ {error, {binding_invalid, Fmt, Args}} ->
+ rabbit_misc:protocol_error(precondition_failed, Fmt, Args);
+ {error, #amqp_error{} = Error} ->
+ rabbit_misc:protocol_error(Error);
+ ok ->
+ ok
+ end.
+
+basic_return(#basic_message{exchange_name = ExchangeName,
+ routing_keys = [RoutingKey | _CcRoutes],
+ content = Content},
+ State = #ch{cfg = #conf{protocol = Protocol,
+ writer_pid = WriterPid}},
+ Reason) ->
+ ?INCR_STATS(exchange_stats, ExchangeName, 1, return_unroutable, State),
+ {_Close, ReplyCode, ReplyText} = Protocol:lookup_amqp_exception(Reason),
+ ok = rabbit_writer:send_command(
+ WriterPid,
+ #'basic.return'{reply_code = ReplyCode,
+ reply_text = ReplyText,
+ exchange = ExchangeName#resource.name,
+ routing_key = RoutingKey},
+ Content).
+
+reject(DeliveryTag, Requeue, Multiple,
+ State = #ch{unacked_message_q = UAMQ, tx = Tx}) ->
+ {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple),
+ State1 = State#ch{unacked_message_q = Remaining},
+ {noreply, case Tx of
+ none ->
+ {State2, Actions} = internal_reject(Requeue, Acked, State1#ch.limiter, State1),
+ handle_queue_actions(Actions, State2);
+ {Msgs, Acks} ->
+ Acks1 = ack_cons(Requeue, Acked, Acks),
+ State1#ch{tx = {Msgs, Acks1}}
+ end}.
+
+%% NB: Acked is in youngest-first order
+internal_reject(Requeue, Acked, Limiter,
+ State = #ch{queue_states = QueueStates0}) ->
+ {QueueStates, Actions} =
+ foreach_per_queue(
+ fun({QRef, CTag}, MsgIds, {Acc0, Actions0}) ->
+ Op = case Requeue of
+ false -> discard;
+ true -> requeue
+ end,
+ case rabbit_queue_type:settle(QRef, Op, CTag, MsgIds, Acc0) of
+ {ok, Acc, Actions} ->
+ {Acc, Actions0 ++ Actions};
+ {protocol_error, ErrorType, Reason, ReasonArgs} ->
+ rabbit_misc:protocol_error(ErrorType, Reason, ReasonArgs)
+ end
+ end, Acked, {QueueStates0, []}),
+ ok = notify_limiter(Limiter, Acked),
+ {State#ch{queue_states = QueueStates}, Actions}.
+
+record_sent(Type, Tag, AckRequired,
+ Msg = {QName, _QPid, MsgId, Redelivered, _Message},
+ State = #ch{cfg = #conf{channel = ChannelNum,
+ trace_state = TraceState,
+ user = #user{username = Username},
+ conn_name = ConnName
+ },
+ unacked_message_q = UAMQ,
+ next_tag = DeliveryTag
+ }) ->
+ ?INCR_STATS(queue_stats, QName, 1, case {Type, AckRequired} of
+ {get, true} -> get;
+ {get, false} -> get_no_ack;
+ {deliver, true} -> deliver;
+ {deliver, false} -> deliver_no_ack
+ end, State),
+ case Redelivered of
+ true -> ?INCR_STATS(queue_stats, QName, 1, redeliver, State);
+ false -> ok
+ end,
+ DeliveredAt = os:system_time(millisecond),
+ rabbit_trace:tap_out(Msg, ConnName, ChannelNum, Username, TraceState),
+ UAMQ1 = case AckRequired of
+ true ->
+ ?QUEUE:in(#pending_ack{delivery_tag = DeliveryTag,
+ tag = Tag,
+ delivered_at = DeliveredAt,
+ queue = QName,
+ msg_id = MsgId}, UAMQ);
+ false ->
+ UAMQ
+ end,
+ State#ch{unacked_message_q = UAMQ1, next_tag = DeliveryTag + 1}.
+
+%% NB: returns acks in youngest-first order
+collect_acks(Q, 0, true) ->
+ {lists:reverse(?QUEUE:to_list(Q)), ?QUEUE:new()};
+collect_acks(Q, DeliveryTag, Multiple) ->
+ collect_acks([], [], Q, DeliveryTag, Multiple).
+
+collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) ->
+ case ?QUEUE:out(Q) of
+ {{value, UnackedMsg = #pending_ack{delivery_tag = CurrentDeliveryTag}},
+ QTail} ->
+ if CurrentDeliveryTag == DeliveryTag ->
+ {[UnackedMsg | ToAcc],
+ case PrefixAcc of
+ [] -> QTail;
+ _ -> ?QUEUE:join(
+ ?QUEUE:from_list(lists:reverse(PrefixAcc)),
+ QTail)
+ end};
+ Multiple ->
+ collect_acks([UnackedMsg | ToAcc], PrefixAcc,
+ QTail, DeliveryTag, Multiple);
+ true ->
+ collect_acks(ToAcc, [UnackedMsg | PrefixAcc],
+ QTail, DeliveryTag, Multiple)
+ end;
+ {empty, _} ->
+ precondition_failed("unknown delivery tag ~w", [DeliveryTag])
+ end.
+
+%% NB: Acked is in youngest-first order
+ack(Acked, State = #ch{queue_states = QueueStates0}) ->
+ {QueueStates, Actions} =
+ foreach_per_queue(
+ fun ({QRef, CTag}, MsgIds, {Acc0, ActionsAcc0}) ->
+ case rabbit_queue_type:settle(QRef, complete, CTag,
+ MsgIds, Acc0) of
+ {ok, Acc, ActionsAcc} ->
+ incr_queue_stats(QRef, MsgIds, State),
+ {Acc, ActionsAcc0 ++ ActionsAcc};
+ {protocol_error, ErrorType, Reason, ReasonArgs} ->
+ rabbit_misc:protocol_error(ErrorType, Reason, ReasonArgs)
+ end
+ end, Acked, {QueueStates0, []}),
+ ok = notify_limiter(State#ch.limiter, Acked),
+ {State#ch{queue_states = QueueStates}, Actions}.
+
+incr_queue_stats(QName, MsgIds, State) ->
+ Count = length(MsgIds),
+ ?INCR_STATS(queue_stats, QName, Count, ack, State).
+
+%% {Msgs, Acks}
+%%
+%% Msgs is a queue.
+%%
+%% Acks looks s.t. like this:
+%% [{false,[5,4]},{true,[3]},{ack,[2,1]}, ...]
+%%
+%% Each element is a pair consisting of a tag and a list of
+%% ack'ed/reject'ed msg ids. The tag is one of 'ack' (to ack), 'true'
+%% (reject w requeue), 'false' (reject w/o requeue). The msg ids, as
+%% well as the list overall, are in "most-recent (generally youngest)
+%% ack first" order.
+new_tx() -> {?QUEUE:new(), []}.
+
+notify_queues(State = #ch{cfg = #conf{state = closing}}) ->
+ {ok, State};
+notify_queues(State = #ch{consumer_mapping = Consumers,
+ cfg = Cfg}) ->
+ QPids = classic_consumer_queue_pids(Consumers),
+ Timeout = get_operation_timeout(),
+ {rabbit_amqqueue:notify_down_all(QPids, self(), Timeout),
+ State#ch{cfg = Cfg#conf{state = closing}}}.
+
+foreach_per_queue(_F, [], Acc) ->
+ Acc;
+foreach_per_queue(F, [#pending_ack{tag = CTag,
+ queue = QName,
+ msg_id = MsgId}], Acc) ->
+ %% quorum queue, needs the consumer tag
+ F({QName, CTag}, [MsgId], Acc);
+foreach_per_queue(F, UAL, Acc) ->
+ T = lists:foldl(fun (#pending_ack{tag = CTag,
+ queue = QName,
+ msg_id = MsgId}, T) ->
+ rabbit_misc:gb_trees_cons({QName, CTag}, MsgId, T)
+ end, gb_trees:empty(), UAL),
+ rabbit_misc:gb_trees_fold(fun (Key, Val, Acc0) -> F(Key, Val, Acc0) end, Acc, T).
+
+%% hack to patch up missing queue type behaviour for classic queue
+classic_consumer_queue_pids(Consumers) ->
+ lists:usort([amqqueue:get_pid(Q)
+ || {Q, _CParams} <- maps:values(Consumers),
+ amqqueue:get_type(Q) == rabbit_classic_queue]).
+
+%% tell the limiter about the number of acks that have been received
+%% for messages delivered to subscribed consumers, but not acks for
+%% messages sent in a response to a basic.get (identified by their
+%% consumer tag as an integer (the same as the delivery tag, required
+%% quorum queues))
+notify_limiter(Limiter, Acked) ->
+ %% optimisation: avoid the potentially expensive 'foldl' in the
+ %% common case.
+ case rabbit_limiter:is_active(Limiter) of
+ false -> ok;
+ true -> case lists:foldl(fun ({_, CTag, _, _}, Acc) when is_integer(CTag) ->
+ %% Quorum queues use integer CTags
+ %% classic queues use binaries
+ %% Quorum queues do not interact
+ %% with limiters
+ Acc;
+ ({_, _, _, _}, Acc) -> Acc + 1
+ end, 0, Acked) of
+ 0 -> ok;
+ Count -> rabbit_limiter:ack(Limiter, Count)
+ end
+ end.
+
+deliver_to_queues({#delivery{message = #basic_message{exchange_name = XName},
+ confirm = false,
+ mandatory = false},
+ _RoutedToQs = []}, State) -> %% optimisation
+ ?INCR_STATS(exchange_stats, XName, 1, publish, State),
+ ?INCR_STATS(exchange_stats, XName, 1, drop_unroutable, State),
+ State;
+deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{
+ exchange_name = XName},
+ mandatory = Mandatory,
+ confirm = Confirm,
+ msg_seq_no = MsgSeqNo},
+ DelQNames}, State0 = #ch{queue_states = QueueStates0}) ->
+ Qs = rabbit_amqqueue:lookup(DelQNames),
+ AllQueueNames = lists:foldl(fun (Q, Acc) ->
+ QRef = amqqueue:get_name(Q),
+ [QRef | Acc]
+ end, [], Qs),
+ {ok, QueueStates, Actions} =
+ rabbit_queue_type:deliver(Qs, Delivery, QueueStates0),
+ %% NB: the order here is important since basic.returns must be
+ %% sent before confirms.
+ ok = process_routing_mandatory(Mandatory, Qs, Message, State0),
+ State1 = process_routing_confirm(Confirm, AllQueueNames,
+ MsgSeqNo, XName, State0),
+ %% Actions must be processed after registering confirms as actions may
+ %% contain rejections of publishes
+ State = handle_queue_actions(Actions,
+ State1#ch{queue_states = QueueStates}),
+ case rabbit_event:stats_level(State, #ch.stats_timer) of
+ fine ->
+ ?INCR_STATS(exchange_stats, XName, 1, publish),
+ [?INCR_STATS(queue_exchange_stats,
+ {amqqueue:get_name(Q), XName}, 1, publish)
+ || Q <- Qs];
+ _ ->
+ ok
+ end,
+ State.
+
+process_routing_mandatory(_Mandatory = true,
+ _RoutedToQs = [],
+ Msg, State) ->
+ ok = basic_return(Msg, State, no_route),
+ ok;
+process_routing_mandatory(_Mandatory = false,
+ _RoutedToQs = [],
+ #basic_message{exchange_name = ExchangeName}, State) ->
+ ?INCR_STATS(exchange_stats, ExchangeName, 1, drop_unroutable, State),
+ ok;
+process_routing_mandatory(_, _, _, _) ->
+ ok.
+
+process_routing_confirm(false, _, _, _, State) ->
+ State;
+process_routing_confirm(true, [], MsgSeqNo, XName, State) ->
+ record_confirms([{MsgSeqNo, XName}], State);
+process_routing_confirm(true, QRefs, MsgSeqNo, XName, State) ->
+ State#ch{unconfirmed =
+ rabbit_confirms:insert(MsgSeqNo, QRefs, XName, State#ch.unconfirmed)}.
+
+confirm(MsgSeqNos, QRef, State = #ch{unconfirmed = UC}) ->
+ %% NOTE: if queue name does not exist here it's likely that the ref also
+ %% does not exist in unconfirmed messages.
+ %% Neither does the 'ignore' atom, so it's a reasonable fallback.
+ {ConfirmMXs, UC1} = rabbit_confirms:confirm(MsgSeqNos, QRef, UC),
+ %% NB: don't call noreply/1 since we don't want to send confirms.
+ record_confirms(ConfirmMXs, State#ch{unconfirmed = UC1}).
+
+send_confirms_and_nacks(State = #ch{tx = none, confirmed = [], rejected = []}) ->
+ State;
+send_confirms_and_nacks(State = #ch{tx = none, confirmed = C, rejected = R}) ->
+ case rabbit_node_monitor:pause_partition_guard() of
+ ok ->
+ Confirms = lists:append(C),
+ Rejects = lists:append(R),
+ ConfirmMsgSeqNos =
+ lists:foldl(
+ fun ({MsgSeqNo, XName}, MSNs) ->
+ ?INCR_STATS(exchange_stats, XName, 1, confirm, State),
+ [MsgSeqNo | MSNs]
+ end, [], Confirms),
+ RejectMsgSeqNos = [MsgSeqNo || {MsgSeqNo, _} <- Rejects],
+
+ State1 = send_confirms(ConfirmMsgSeqNos,
+ RejectMsgSeqNos,
+ State#ch{confirmed = []}),
+ %% TODO: msg seq nos, same as for confirms. Need to implement
+ %% nack rates first.
+ send_nacks(RejectMsgSeqNos,
+ ConfirmMsgSeqNos,
+ State1#ch{rejected = []});
+ pausing -> State
+ end;
+send_confirms_and_nacks(State) ->
+ case rabbit_node_monitor:pause_partition_guard() of
+ ok -> maybe_complete_tx(State);
+ pausing -> State
+ end.
+
+send_nacks([], _, State) ->
+ State;
+send_nacks(_Rs, _, State = #ch{cfg = #conf{state = closing}}) -> %% optimisation
+ State;
+send_nacks(Rs, Cs, State) ->
+ coalesce_and_send(Rs, Cs,
+ fun(MsgSeqNo, Multiple) ->
+ #'basic.nack'{delivery_tag = MsgSeqNo,
+ multiple = Multiple}
+ end, State).
+
+send_confirms([], _, State) ->
+ State;
+send_confirms(_Cs, _, State = #ch{cfg = #conf{state = closing}}) -> %% optimisation
+ State;
+send_confirms([MsgSeqNo], _, State) ->
+ ok = send(#'basic.ack'{delivery_tag = MsgSeqNo}, State),
+ State;
+send_confirms(Cs, Rs, State) ->
+ coalesce_and_send(Cs, Rs,
+ fun(MsgSeqNo, Multiple) ->
+ #'basic.ack'{delivery_tag = MsgSeqNo,
+ multiple = Multiple}
+ end, State).
+
+coalesce_and_send(MsgSeqNos, NegativeMsgSeqNos, MkMsgFun, State = #ch{unconfirmed = UC}) ->
+ SMsgSeqNos = lists:usort(MsgSeqNos),
+ UnconfirmedCutoff = case rabbit_confirms:is_empty(UC) of
+ true -> lists:last(SMsgSeqNos) + 1;
+ false -> rabbit_confirms:smallest(UC)
+ end,
+ Cutoff = lists:min([UnconfirmedCutoff | NegativeMsgSeqNos]),
+ {Ms, Ss} = lists:splitwith(fun(X) -> X < Cutoff end, SMsgSeqNos),
+ case Ms of
+ [] -> ok;
+ _ -> ok = send(MkMsgFun(lists:last(Ms), true), State)
+ end,
+ [ok = send(MkMsgFun(SeqNo, false), State) || SeqNo <- Ss],
+ State.
+
+ack_cons(Tag, Acked, [{Tag, Acks} | L]) -> [{Tag, Acked ++ Acks} | L];
+ack_cons(Tag, Acked, Acks) -> [{Tag, Acked} | Acks].
+
+ack_len(Acks) -> lists:sum([length(L) || {ack, L} <- Acks]).
+
+maybe_complete_tx(State = #ch{tx = {_, _}}) ->
+ State;
+maybe_complete_tx(State = #ch{unconfirmed = UC}) ->
+ case rabbit_confirms:is_empty(UC) of
+ false -> State;
+ true -> complete_tx(State#ch{confirmed = []})
+ end.
+
+complete_tx(State = #ch{tx = committing}) ->
+ ok = send(#'tx.commit_ok'{}, State),
+ State#ch{tx = new_tx()};
+complete_tx(State = #ch{tx = failed}) ->
+ {noreply, State1} = handle_exception(
+ rabbit_misc:amqp_error(
+ precondition_failed, "partial tx completion", [],
+ 'tx.commit'),
+ State),
+ State1#ch{tx = new_tx()}.
+
+infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
+
+infos(Items, Deadline, State) ->
+ [begin
+ Now = now_millis(),
+ if
+ Now > Deadline ->
+ throw(timeout);
+ true ->
+ {Item, i(Item, State)}
+ end
+ end || Item <- Items].
+
+i(pid, _) -> self();
+i(connection, #ch{cfg = #conf{conn_pid = ConnPid}}) -> ConnPid;
+i(number, #ch{cfg = #conf{channel = Channel}}) -> Channel;
+i(user, #ch{cfg = #conf{user = User}}) -> User#user.username;
+i(user_who_performed_action, Ch) -> i(user, Ch);
+i(vhost, #ch{cfg = #conf{virtual_host = VHost}}) -> VHost;
+i(transactional, #ch{tx = Tx}) -> Tx =/= none;
+i(confirm, #ch{confirm_enabled = CE}) -> CE;
+i(name, State) -> name(State);
+i(consumer_count, #ch{consumer_mapping = CM}) -> maps:size(CM);
+i(messages_unconfirmed, #ch{unconfirmed = UC}) -> rabbit_confirms:size(UC);
+i(messages_unacknowledged, #ch{unacked_message_q = UAMQ}) -> ?QUEUE:len(UAMQ);
+i(messages_uncommitted, #ch{tx = {Msgs, _Acks}}) -> ?QUEUE:len(Msgs);
+i(messages_uncommitted, #ch{}) -> 0;
+i(acks_uncommitted, #ch{tx = {_Msgs, Acks}}) -> ack_len(Acks);
+i(acks_uncommitted, #ch{}) -> 0;
+i(pending_raft_commands, #ch{queue_states = QS}) ->
+ pending_raft_commands(QS);
+i(state, #ch{cfg = #conf{state = running}}) -> credit_flow:state();
+i(state, #ch{cfg = #conf{state = State}}) -> State;
+i(prefetch_count, #ch{cfg = #conf{consumer_prefetch = C}}) -> C;
+i(global_prefetch_count, #ch{limiter = Limiter}) ->
+ rabbit_limiter:get_prefetch_limit(Limiter);
+i(interceptors, #ch{interceptor_state = IState}) ->
+ IState;
+i(garbage_collection, _State) ->
+ rabbit_misc:get_gc_info(self());
+i(reductions, _State) ->
+ {reductions, Reductions} = erlang:process_info(self(), reductions),
+ Reductions;
+i(Item, _) ->
+ throw({bad_argument, Item}).
+
+pending_raft_commands(QStates) ->
+ Fun = fun(_, V, Acc) ->
+ case rabbit_queue_type:state_info(V) of
+ #{pending_raft_commands := P} ->
+ Acc + P;
+ _ ->
+ Acc
+ end
+ end,
+ rabbit_queue_type:fold_state(Fun, 0, QStates).
+
+name(#ch{cfg = #conf{conn_name = ConnName, channel = Channel}}) ->
+ list_to_binary(rabbit_misc:format("~s (~p)", [ConnName, Channel])).
+
+emit_stats(State) -> emit_stats(State, []).
+
+emit_stats(State, Extra) ->
+ [{reductions, Red} | Coarse0] = infos(?STATISTICS_KEYS, State),
+ %% First metric must be `idle_since` (if available), as expected by
+ %% `rabbit_mgmt_format:format_channel_stats`. This is a performance
+ %% optimisation that avoids traversing the whole list when only
+ %% one element has to be formatted.
+ rabbit_core_metrics:channel_stats(self(), Extra ++ Coarse0),
+ rabbit_core_metrics:channel_stats(reductions, self(), Red).
+
+erase_queue_stats(QName) ->
+ rabbit_core_metrics:channel_queue_down({self(), QName}),
+ erase({queue_stats, QName}),
+ [begin
+ rabbit_core_metrics:channel_queue_exchange_down({self(), QX}),
+ erase({queue_exchange_stats, QX})
+ end || {{queue_exchange_stats, QX = {QName0, _}}, _} <- get(),
+ QName0 =:= QName].
+
+get_vhost(#ch{cfg = #conf{virtual_host = VHost}}) -> VHost.
+
+get_user(#ch{cfg = #conf{user = User}}) -> User.
+
+delete_stats({queue_stats, QName}) ->
+ rabbit_core_metrics:channel_queue_down({self(), QName});
+delete_stats({exchange_stats, XName}) ->
+ rabbit_core_metrics:channel_exchange_down({self(), XName});
+delete_stats({queue_exchange_stats, QX}) ->
+ rabbit_core_metrics:channel_queue_exchange_down({self(), QX});
+delete_stats(_) ->
+ ok.
+
+put_operation_timeout() ->
+ put(channel_operation_timeout, ?CHANNEL_OPERATION_TIMEOUT).
+
+get_operation_timeout() ->
+ get(channel_operation_timeout).
+
+%% Refactored and exported to allow direct calls from the HTTP API,
+%% avoiding the usage of AMQP 0-9-1 from the management.
+
+handle_method(#'exchange.bind'{destination = DestinationNameBin,
+ source = SourceNameBin,
+ routing_key = RoutingKey,
+ arguments = Arguments},
+ ConnPid, AuthzContext, _CollectorId, VHostPath, User) ->
+ binding_action(fun rabbit_binding:add/3,
+ SourceNameBin, exchange, DestinationNameBin,
+ RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, User);
+handle_method(#'exchange.unbind'{destination = DestinationNameBin,
+ source = SourceNameBin,
+ routing_key = RoutingKey,
+ arguments = Arguments},
+ ConnPid, AuthzContext, _CollectorId, VHostPath, User) ->
+ binding_action(fun rabbit_binding:remove/3,
+ SourceNameBin, exchange, DestinationNameBin,
+ RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, User);
+handle_method(#'queue.unbind'{queue = QueueNameBin,
+ exchange = ExchangeNameBin,
+ routing_key = RoutingKey,
+ arguments = Arguments},
+ ConnPid, AuthzContext, _CollectorId, VHostPath, User) ->
+ binding_action(fun rabbit_binding:remove/3,
+ ExchangeNameBin, queue, QueueNameBin,
+ RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, User);
+handle_method(#'queue.bind'{queue = QueueNameBin,
+ exchange = ExchangeNameBin,
+ routing_key = RoutingKey,
+ arguments = Arguments},
+ ConnPid, AuthzContext, _CollectorId, VHostPath, User) ->
+ binding_action(fun rabbit_binding:add/3,
+ ExchangeNameBin, queue, QueueNameBin,
+ RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, User);
+%% Note that all declares to these are effectively passive. If it
+%% exists it by definition has one consumer.
+handle_method(#'queue.declare'{queue = <<"amq.rabbitmq.reply-to",
+ _/binary>> = QueueNameBin},
+ _ConnPid, _AuthzContext, _CollectorPid, VHost, _User) ->
+ StrippedQueueNameBin = strip_cr_lf(QueueNameBin),
+ QueueName = rabbit_misc:r(VHost, queue, StrippedQueueNameBin),
+ case declare_fast_reply_to(StrippedQueueNameBin) of
+ exists -> {ok, QueueName, 0, 1};
+ not_found -> rabbit_amqqueue:not_found(QueueName)
+ end;
+handle_method(#'queue.declare'{queue = QueueNameBin,
+ passive = false,
+ durable = DurableDeclare,
+ exclusive = ExclusiveDeclare,
+ auto_delete = AutoDelete,
+ nowait = NoWait,
+ arguments = Args} = Declare,
+ ConnPid, AuthzContext, CollectorPid, VHostPath,
+ #user{username = Username} = User) ->
+ Owner = case ExclusiveDeclare of
+ true -> ConnPid;
+ false -> none
+ end,
+ StrippedQueueNameBin = strip_cr_lf(QueueNameBin),
+ Durable = DurableDeclare andalso not ExclusiveDeclare,
+ ActualNameBin = case StrippedQueueNameBin of
+ <<>> ->
+ case rabbit_amqqueue:is_server_named_allowed(Args) of
+ true ->
+ rabbit_guid:binary(rabbit_guid:gen_secure(), "amq.gen");
+ false ->
+ rabbit_misc:protocol_error(
+ precondition_failed,
+ "Cannot declare a server-named queue for type ~p",
+ [rabbit_amqqueue:get_queue_type(Args)])
+ end;
+ Other -> check_name('queue', Other)
+ end,
+ QueueName = rabbit_misc:r(VHostPath, queue, ActualNameBin),
+ check_configure_permitted(QueueName, User, AuthzContext),
+ rabbit_core_metrics:queue_declared(QueueName),
+ case rabbit_amqqueue:with(
+ QueueName,
+ fun (Q) -> ok = rabbit_amqqueue:assert_equivalence(
+ Q, Durable, AutoDelete, Args, Owner),
+ maybe_stat(NoWait, Q)
+ end) of
+ {ok, MessageCount, ConsumerCount} ->
+ {ok, QueueName, MessageCount, ConsumerCount};
+ {error, not_found} ->
+ %% enforce the limit for newly declared queues only
+ check_vhost_queue_limit(QueueName, VHostPath),
+ DlxKey = <<"x-dead-letter-exchange">>,
+ case rabbit_misc:r_arg(VHostPath, exchange, Args, DlxKey) of
+ undefined ->
+ ok;
+ {error, {invalid_type, Type}} ->
+ precondition_failed(
+ "invalid type '~s' for arg '~s' in ~s",
+ [Type, DlxKey, rabbit_misc:rs(QueueName)]);
+ DLX ->
+ check_read_permitted(QueueName, User, AuthzContext),
+ check_write_permitted(DLX, User, AuthzContext),
+ ok
+ end,
+ case rabbit_amqqueue:declare(QueueName, Durable, AutoDelete,
+ Args, Owner, Username) of
+ {new, Q} when ?is_amqqueue(Q) ->
+ %% We need to notify the reader within the channel
+ %% process so that we can be sure there are no
+ %% outstanding exclusive queues being declared as
+ %% the connection shuts down.
+ QPid = amqqueue:get_pid(Q),
+ ok = case {Owner, CollectorPid} of
+ {none, _} -> ok;
+ {_, none} -> ok; %% Supports call from mgmt API
+ _ -> rabbit_queue_collector:register(
+ CollectorPid, QPid)
+ end,
+ rabbit_core_metrics:queue_created(QueueName),
+ {ok, QueueName, 0, 0};
+ {existing, _Q} ->
+ %% must have been created between the stat and the
+ %% declare. Loop around again.
+ handle_method(Declare, ConnPid, AuthzContext, CollectorPid, VHostPath,
+ User);
+ {absent, Q, Reason} ->
+ rabbit_amqqueue:absent(Q, Reason);
+ {owner_died, _Q} ->
+ %% Presumably our own days are numbered since the
+ %% connection has died. Pretend the queue exists though,
+ %% just so nothing fails.
+ {ok, QueueName, 0, 0};
+ {protocol_error, ErrorType, Reason, ReasonArgs} ->
+ rabbit_misc:protocol_error(ErrorType, Reason, ReasonArgs)
+ end;
+ {error, {absent, Q, Reason}} ->
+ rabbit_amqqueue:absent(Q, Reason)
+ end;
+handle_method(#'queue.declare'{queue = QueueNameBin,
+ nowait = NoWait,
+ passive = true},
+ ConnPid, _AuthzContext, _CollectorPid, VHostPath, _User) ->
+ StrippedQueueNameBin = strip_cr_lf(QueueNameBin),
+ QueueName = rabbit_misc:r(VHostPath, queue, StrippedQueueNameBin),
+ Fun = fun (Q0) ->
+ QStat = maybe_stat(NoWait, Q0),
+ {QStat, Q0}
+ end,
+ %% Note: no need to check if Q is an #amqqueue, with_or_die does it
+ {{ok, MessageCount, ConsumerCount}, Q} = rabbit_amqqueue:with_or_die(QueueName, Fun),
+ ok = rabbit_amqqueue:check_exclusive_access(Q, ConnPid),
+ {ok, QueueName, MessageCount, ConsumerCount};
+handle_method(#'queue.delete'{queue = QueueNameBin,
+ if_unused = IfUnused,
+ if_empty = IfEmpty},
+ ConnPid, AuthzContext, _CollectorPid, VHostPath,
+ User = #user{username = Username}) ->
+ StrippedQueueNameBin = strip_cr_lf(QueueNameBin),
+ QueueName = qbin_to_resource(StrippedQueueNameBin, VHostPath),
+
+ check_configure_permitted(QueueName, User, AuthzContext),
+ case rabbit_amqqueue:with(
+ QueueName,
+ fun (Q) ->
+ rabbit_amqqueue:check_exclusive_access(Q, ConnPid),
+ rabbit_queue_type:delete(Q, IfUnused, IfEmpty, Username)
+ end,
+ fun (not_found) ->
+ {ok, 0};
+ ({absent, Q, crashed}) ->
+ _ = rabbit_classic_queue:delete_crashed(Q, Username),
+ {ok, 0};
+ ({absent, Q, stopped}) ->
+ _ = rabbit_classic_queue:delete_crashed(Q, Username),
+ {ok, 0};
+ ({absent, Q, Reason}) ->
+ rabbit_amqqueue:absent(Q, Reason)
+ end) of
+ {error, in_use} ->
+ precondition_failed("~s in use", [rabbit_misc:rs(QueueName)]);
+ {error, not_empty} ->
+ precondition_failed("~s not empty", [rabbit_misc:rs(QueueName)]);
+ {ok, Count} ->
+ {ok, Count};
+ {protocol_error, Type, Reason, ReasonArgs} ->
+ rabbit_misc:protocol_error(Type, Reason, ReasonArgs)
+ end;
+handle_method(#'exchange.delete'{exchange = ExchangeNameBin,
+ if_unused = IfUnused},
+ _ConnPid, AuthzContext, _CollectorPid, VHostPath,
+ User = #user{username = Username}) ->
+ StrippedExchangeNameBin = strip_cr_lf(ExchangeNameBin),
+ ExchangeName = rabbit_misc:r(VHostPath, exchange, StrippedExchangeNameBin),
+ check_not_default_exchange(ExchangeName),
+ check_exchange_deletion(ExchangeName),
+ check_configure_permitted(ExchangeName, User, AuthzContext),
+ case rabbit_exchange:delete(ExchangeName, IfUnused, Username) of
+ {error, not_found} ->
+ ok;
+ {error, in_use} ->
+ precondition_failed("~s in use", [rabbit_misc:rs(ExchangeName)]);
+ ok ->
+ ok
+ end;
+handle_method(#'queue.purge'{queue = QueueNameBin},
+ ConnPid, AuthzContext, _CollectorPid, VHostPath, User) ->
+ QueueName = qbin_to_resource(QueueNameBin, VHostPath),
+ check_read_permitted(QueueName, User, AuthzContext),
+ rabbit_amqqueue:with_exclusive_access_or_die(
+ QueueName, ConnPid,
+ fun (Q) ->
+ case rabbit_queue_type:purge(Q) of
+ {ok, _} = Res ->
+ Res;
+ {error, not_supported} ->
+ rabbit_misc:protocol_error(
+ not_implemented,
+ "queue.purge not supported by stream queues ~s",
+ [rabbit_misc:rs(amqqueue:get_name(Q))])
+ end
+ end);
+handle_method(#'exchange.declare'{exchange = ExchangeNameBin,
+ type = TypeNameBin,
+ passive = false,
+ durable = Durable,
+ auto_delete = AutoDelete,
+ internal = Internal,
+ arguments = Args},
+ _ConnPid, AuthzContext, _CollectorPid, VHostPath,
+ #user{username = Username} = User) ->
+ CheckedType = rabbit_exchange:check_type(TypeNameBin),
+ ExchangeName = rabbit_misc:r(VHostPath, exchange, strip_cr_lf(ExchangeNameBin)),
+ check_not_default_exchange(ExchangeName),
+ check_configure_permitted(ExchangeName, User, AuthzContext),
+ X = case rabbit_exchange:lookup(ExchangeName) of
+ {ok, FoundX} -> FoundX;
+ {error, not_found} ->
+ check_name('exchange', strip_cr_lf(ExchangeNameBin)),
+ AeKey = <<"alternate-exchange">>,
+ case rabbit_misc:r_arg(VHostPath, exchange, Args, AeKey) of
+ undefined -> ok;
+ {error, {invalid_type, Type}} ->
+ precondition_failed(
+ "invalid type '~s' for arg '~s' in ~s",
+ [Type, AeKey, rabbit_misc:rs(ExchangeName)]);
+ AName -> check_read_permitted(ExchangeName, User, AuthzContext),
+ check_write_permitted(AName, User, AuthzContext),
+ ok
+ end,
+ rabbit_exchange:declare(ExchangeName,
+ CheckedType,
+ Durable,
+ AutoDelete,
+ Internal,
+ Args,
+ Username)
+ end,
+ ok = rabbit_exchange:assert_equivalence(X, CheckedType, Durable,
+ AutoDelete, Internal, Args);
+handle_method(#'exchange.declare'{exchange = ExchangeNameBin,
+ passive = true},
+ _ConnPid, _AuthzContext, _CollectorPid, VHostPath, _User) ->
+ ExchangeName = rabbit_misc:r(VHostPath, exchange, strip_cr_lf(ExchangeNameBin)),
+ check_not_default_exchange(ExchangeName),
+ _ = rabbit_exchange:lookup_or_die(ExchangeName).
+
+handle_deliver(CTag, Ack, Msgs, State) when is_list(Msgs) ->
+ lists:foldl(fun(Msg, S) ->
+ handle_deliver0(CTag, Ack, Msg, S)
+ end, State, Msgs);
+handle_deliver(CTag, Ack, Msg, State) ->
+ %% backwards compatibility clause
+ handle_deliver0(CTag, Ack, Msg, State).
+
+handle_deliver0(ConsumerTag, AckRequired,
+ Msg = {QName, QPid, _MsgId, Redelivered,
+ #basic_message{exchange_name = ExchangeName,
+ routing_keys = [RoutingKey | _CcRoutes],
+ content = Content}},
+ State = #ch{cfg = #conf{writer_pid = WriterPid,
+ writer_gc_threshold = GCThreshold},
+ next_tag = DeliveryTag,
+ queue_states = Qs}) ->
+ Deliver = #'basic.deliver'{consumer_tag = ConsumerTag,
+ delivery_tag = DeliveryTag,
+ redelivered = Redelivered,
+ exchange = ExchangeName#resource.name,
+ routing_key = RoutingKey},
+ case rabbit_queue_type:module(QName, Qs) of
+ {ok, rabbit_classic_queue} ->
+ ok = rabbit_writer:send_command_and_notify(
+ WriterPid, QPid, self(), Deliver, Content);
+ _ ->
+ ok = rabbit_writer:send_command(WriterPid, Deliver, Content)
+ end,
+ case GCThreshold of
+ undefined -> ok;
+ _ -> rabbit_basic:maybe_gc_large_msg(Content, GCThreshold)
+ end,
+ record_sent(deliver, ConsumerTag, AckRequired, Msg, State).
+
+handle_basic_get(WriterPid, DeliveryTag, NoAck, MessageCount,
+ Msg = {_QName, _QPid, _MsgId, Redelivered,
+ #basic_message{exchange_name = ExchangeName,
+ routing_keys = [RoutingKey | _CcRoutes],
+ content = Content}}, State) ->
+ ok = rabbit_writer:send_command(
+ WriterPid,
+ #'basic.get_ok'{delivery_tag = DeliveryTag,
+ redelivered = Redelivered,
+ exchange = ExchangeName#resource.name,
+ routing_key = RoutingKey,
+ message_count = MessageCount},
+ Content),
+ {noreply, record_sent(get, DeliveryTag, not(NoAck), Msg, State)}.
+
+init_tick_timer(State = #ch{tick_timer = undefined}) ->
+ {ok, Interval} = application:get_env(rabbit, channel_tick_interval),
+ State#ch{tick_timer = erlang:send_after(Interval, self(), tick)};
+init_tick_timer(State) ->
+ State.
+
+reset_tick_timer(State) ->
+ State#ch{tick_timer = undefined}.
+
+maybe_cancel_tick_timer(#ch{tick_timer = undefined} = State) ->
+ State;
+maybe_cancel_tick_timer(#ch{tick_timer = TRef,
+ unacked_message_q = UMQ} = State) ->
+ case ?QUEUE:len(UMQ) of
+ 0 ->
+ %% we can only cancel the tick timer if the unacked messages
+ %% queue is empty.
+ _ = erlang:cancel_timer(TRef),
+ State#ch{tick_timer = undefined};
+ _ ->
+ %% let the timer continue
+ State
+ end.
+
+now_millis() ->
+ erlang:monotonic_time(millisecond).
+
+get_operation_timeout_and_deadline() ->
+ % NB: can't use get_operation_timeout because
+ % this code may not be running via the channel Pid
+ Timeout = ?CHANNEL_OPERATION_TIMEOUT,
+ Deadline = now_millis() + Timeout,
+ {Timeout, Deadline}.
+
+queue_fold(Fun, Init, Q) ->
+ case ?QUEUE:out(Q) of
+ {empty, _Q} -> Init;
+ {{value, V}, Q1} -> queue_fold(Fun, Fun(V, Init), Q1)
+ end.
+
+evaluate_consumer_timeout(State0 = #ch{cfg = #conf{channel = Channel,
+ consumer_timeout = Timeout},
+ unacked_message_q = UAMQ}) ->
+ Now = os:system_time(millisecond),
+ case ?QUEUE:peek(UAMQ) of
+ {value, #pending_ack{delivery_tag = ConsumerTag,
+ delivered_at = Time}}
+ when is_integer(Timeout)
+ andalso Time < Now - Timeout ->
+ rabbit_log_channel:warning("Consumer ~s on channel ~w has timed out "
+ "waiting on consumer acknowledgement. Timeout used: ~p ms",
+ [rabbit_data_coercion:to_binary(ConsumerTag),
+ Channel, Timeout]),
+ Ex = rabbit_misc:amqp_error(precondition_failed,
+ "consumer ack timed out on channel ~w",
+ [Channel], none),
+ handle_exception(Ex, State0);
+ _ ->
+ {noreply, State0}
+ end.
+
+handle_queue_actions(Actions, #ch{} = State0) ->
+ WriterPid = State0#ch.cfg#conf.writer_pid,
+ lists:foldl(
+ fun ({send_credit_reply, Avail}, S0) ->
+ ok = rabbit_writer:send_command(WriterPid,
+ #'basic.credit_ok'{available = Avail}),
+ S0;
+ ({send_drained, {CTag, Credit}}, S0) ->
+ ok = rabbit_writer:send_command(
+ WriterPid,
+ #'basic.credit_drained'{consumer_tag = CTag,
+ credit_drained = Credit}),
+ S0;
+ ({settled, QRef, MsgSeqNos}, S0) ->
+ confirm(MsgSeqNos, QRef, S0);
+ ({rejected, _QRef, MsgSeqNos}, S0) ->
+ {U, Rej} =
+ lists:foldr(
+ fun(SeqNo, {U1, Acc}) ->
+ case rabbit_confirms:reject(SeqNo, U1) of
+ {ok, MX, U2} ->
+ {U2, [MX | Acc]};
+ {error, not_found} ->
+ {U1, Acc}
+ end
+ end, {S0#ch.unconfirmed, []}, MsgSeqNos),
+ S = S0#ch{unconfirmed = U},
+ record_rejects(Rej, S);
+ ({deliver, CTag, AckRequired, Msgs}, S0) ->
+ handle_deliver(CTag, AckRequired, Msgs, S0);
+ ({queue_down, QRef}, S0) ->
+ handle_consuming_queue_down_or_eol(QRef, S0)
+
+ end, State0, Actions).
+
+find_queue_name_from_pid(Pid, QStates) when is_pid(Pid) ->
+ Fun = fun(K, _V, undefined) ->
+ case rabbit_amqqueue:lookup(K) of
+ {error, not_found} ->
+ undefined;
+ {ok, Q} ->
+ Pids = get_queue_pids(Q),
+ case lists:member(Pid, Pids) of
+ true ->
+ K;
+ false ->
+ undefined
+ end
+ end;
+ (_K, _V, Acc) ->
+ Acc
+ end,
+ rabbit_queue_type:fold_state(Fun, undefined, QStates).
+
+get_queue_pids(Q) when ?amqqueue_is_quorum(Q) ->
+ [amqqueue:get_leader(Q)];
+get_queue_pids(Q) ->
+ [amqqueue:get_pid(Q) | amqqueue:get_slave_pids(Q)].
+
+find_queue_name_from_quorum_name(Name, QStates) ->
+ Fun = fun(K, _V, undefined) ->
+ {ok, Q} = rabbit_amqqueue:lookup(K),
+ case amqqueue:get_pid(Q) of
+ {Name, _} ->
+ amqqueue:get_name(Q);
+ _ ->
+ undefined
+ end
+ end,
+ rabbit_queue_type:fold_state(Fun, undefined, QStates).
diff --git a/deps/rabbit/src/rabbit_channel_interceptor.erl b/deps/rabbit/src/rabbit_channel_interceptor.erl
new file mode 100644
index 0000000000..c40b437f10
--- /dev/null
+++ b/deps/rabbit/src/rabbit_channel_interceptor.erl
@@ -0,0 +1,104 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_channel_interceptor).
+
+-include("rabbit_framing.hrl").
+-include("rabbit.hrl").
+
+-export([init/1, intercept_in/3]).
+
+-behaviour(rabbit_registry_class).
+
+-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]).
+
+-type(method_name() :: rabbit_framing:amqp_method_name()).
+-type(original_method() :: rabbit_framing:amqp_method_record()).
+-type(processed_method() :: rabbit_framing:amqp_method_record()).
+-type(original_content() :: rabbit_types:maybe(rabbit_types:content())).
+-type(processed_content() :: rabbit_types:maybe(rabbit_types:content())).
+-type(interceptor_state() :: term()).
+
+-callback description() -> [proplists:property()].
+%% Derive some initial state from the channel. This will be passed back
+%% as the third argument of intercept/3.
+-callback init(rabbit_channel:channel()) -> interceptor_state().
+-callback intercept(original_method(), original_content(),
+ interceptor_state()) ->
+ {processed_method(), processed_content()} |
+ rabbit_misc:channel_or_connection_exit().
+-callback applies_to() -> list(method_name()).
+
+added_to_rabbit_registry(_Type, _ModuleName) ->
+ rabbit_channel:refresh_interceptors().
+removed_from_rabbit_registry(_Type) ->
+ rabbit_channel:refresh_interceptors().
+
+init(Ch) ->
+ Mods = [M || {_, M} <- rabbit_registry:lookup_all(channel_interceptor)],
+ check_no_overlap(Mods),
+ [{Mod, Mod:init(Ch)} || Mod <- Mods].
+
+check_no_overlap(Mods) ->
+ check_no_overlap1([sets:from_list(Mod:applies_to()) || Mod <- Mods]).
+
+%% Check no non-empty pairwise intersection in a list of sets
+check_no_overlap1(Sets) ->
+ lists:foldl(fun(Set, Union) ->
+ Is = sets:intersection(Set, Union),
+ case sets:size(Is) of
+ 0 -> ok;
+ _ ->
+ internal_error("Interceptor: more than one "
+ "module handles ~p~n", [Is])
+ end,
+ sets:union(Set, Union)
+ end,
+ sets:new(),
+ Sets),
+ ok.
+
+intercept_in(M, C, Mods) ->
+ lists:foldl(fun({Mod, ModState}, {M1, C1}) ->
+ call_module(Mod, ModState, M1, C1)
+ end,
+ {M, C},
+ Mods).
+
+call_module(Mod, St, M, C) ->
+ % this little dance is because Mod might be unloaded at any point
+ case (catch {ok, Mod:intercept(M, C, St)}) of
+ {ok, R} -> validate_response(Mod, M, C, R);
+ {'EXIT', {undef, [{Mod, intercept, _, _} | _]}} -> {M, C}
+ end.
+
+validate_response(Mod, M1, C1, R = {M2, C2}) ->
+ case {validate_method(M1, M2), validate_content(C1, C2)} of
+ {true, true} -> R;
+ {false, _} ->
+ internal_error("Interceptor: ~p expected to return "
+ "method: ~p but returned: ~p",
+ [Mod, rabbit_misc:method_record_type(M1),
+ rabbit_misc:method_record_type(M2)]);
+ {_, false} ->
+ internal_error("Interceptor: ~p expected to return "
+ "content iff content is provided but "
+ "content in = ~p; content out = ~p",
+ [Mod, C1, C2])
+ end.
+
+validate_method(M, M2) ->
+ rabbit_misc:method_record_type(M) =:= rabbit_misc:method_record_type(M2).
+
+validate_content(none, none) -> true;
+validate_content(#content{}, #content{}) -> true;
+validate_content(_, _) -> false.
+
+%% keep dialyzer happy
+-spec internal_error(string(), [any()]) -> no_return().
+internal_error(Format, Args) ->
+ rabbit_misc:protocol_error(internal_error, Format, Args).
diff --git a/deps/rabbit/src/rabbit_channel_sup.erl b/deps/rabbit/src/rabbit_channel_sup.erl
new file mode 100644
index 0000000000..0d405ad3a7
--- /dev/null
+++ b/deps/rabbit/src/rabbit_channel_sup.erl
@@ -0,0 +1,92 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_channel_sup).
+
+%% Supervises processes that implement AMQP 0-9-1 channels:
+%%
+%% * Channel process itself
+%% * Network writer (for network connections)
+%% * Limiter (handles channel QoS and flow control)
+%%
+%% Every rabbit_channel_sup is supervised by rabbit_channel_sup_sup.
+%%
+%% See also rabbit_channel, rabbit_writer, rabbit_limiter.
+
+-behaviour(supervisor2).
+
+-export([start_link/1]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-export_type([start_link_args/0]).
+
+-type start_link_args() ::
+ {'tcp', rabbit_net:socket(), rabbit_channel:channel_number(),
+ non_neg_integer(), pid(), string(), rabbit_types:protocol(),
+ rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(),
+ pid()} |
+ {'direct', rabbit_channel:channel_number(), pid(), string(),
+ rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(),
+ rabbit_framing:amqp_table(), pid()}.
+
+-define(FAIR_WAIT, 70000).
+
+%%----------------------------------------------------------------------------
+
+-spec start_link(start_link_args()) -> {'ok', pid(), {pid(), any()}}.
+
+start_link({tcp, Sock, Channel, FrameMax, ReaderPid, ConnName, Protocol, User,
+ VHost, Capabilities, Collector}) ->
+ {ok, SupPid} = supervisor2:start_link(
+ ?MODULE, {tcp, Sock, Channel, FrameMax,
+ ReaderPid, Protocol, {ConnName, Channel}}),
+ [LimiterPid] = supervisor2:find_child(SupPid, limiter),
+ [WriterPid] = supervisor2:find_child(SupPid, writer),
+ {ok, ChannelPid} =
+ supervisor2:start_child(
+ SupPid,
+ {channel, {rabbit_channel, start_link,
+ [Channel, ReaderPid, WriterPid, ReaderPid, ConnName,
+ Protocol, User, VHost, Capabilities, Collector,
+ LimiterPid]},
+ intrinsic, ?FAIR_WAIT, worker, [rabbit_channel]}),
+ {ok, AState} = rabbit_command_assembler:init(Protocol),
+ {ok, SupPid, {ChannelPid, AState}};
+start_link({direct, Channel, ClientChannelPid, ConnPid, ConnName, Protocol,
+ User, VHost, Capabilities, Collector, AmqpParams}) ->
+ {ok, SupPid} = supervisor2:start_link(
+ ?MODULE, {direct, {ConnName, Channel}}),
+ [LimiterPid] = supervisor2:find_child(SupPid, limiter),
+ {ok, ChannelPid} =
+ supervisor2:start_child(
+ SupPid,
+ {channel, {rabbit_channel, start_link,
+ [Channel, ClientChannelPid, ClientChannelPid, ConnPid,
+ ConnName, Protocol, User, VHost, Capabilities, Collector,
+ LimiterPid, AmqpParams]},
+ intrinsic, ?FAIR_WAIT, worker, [rabbit_channel]}),
+ {ok, SupPid, {ChannelPid, none}}.
+
+%%----------------------------------------------------------------------------
+
+init(Type) ->
+ ?LG_PROCESS_TYPE(channel_sup),
+ {ok, {{one_for_all, 0, 1}, child_specs(Type)}}.
+
+child_specs({tcp, Sock, Channel, FrameMax, ReaderPid, Protocol, Identity}) ->
+ [{writer, {rabbit_writer, start_link,
+ [Sock, Channel, FrameMax, Protocol, ReaderPid, Identity, true]},
+ intrinsic, ?FAIR_WAIT, worker, [rabbit_writer]}
+ | child_specs({direct, Identity})];
+child_specs({direct, Identity}) ->
+ [{limiter, {rabbit_limiter, start_link, [Identity]},
+ transient, ?FAIR_WAIT, worker, [rabbit_limiter]}].
diff --git a/deps/rabbit/src/rabbit_channel_sup_sup.erl b/deps/rabbit/src/rabbit_channel_sup_sup.erl
new file mode 100644
index 0000000000..72cf38d6c8
--- /dev/null
+++ b/deps/rabbit/src/rabbit_channel_sup_sup.erl
@@ -0,0 +1,42 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_channel_sup_sup).
+
+%% Supervisor for AMQP 0-9-1 channels. Every AMQP 0-9-1 connection has
+%% one of these.
+%%
+%% See also rabbit_channel_sup, rabbit_connection_helper_sup, rabbit_reader.
+
+-behaviour(supervisor2).
+
+-export([start_link/0, start_channel/2]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ supervisor2:start_link(?MODULE, []).
+
+-spec start_channel(pid(), rabbit_channel_sup:start_link_args()) ->
+ {'ok', pid(), {pid(), any()}}.
+
+start_channel(Pid, Args) ->
+ supervisor2:start_child(Pid, [Args]).
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+ ?LG_PROCESS_TYPE(channel_sup_sup),
+ {ok, {{simple_one_for_one, 0, 1},
+ [{channel_sup, {rabbit_channel_sup, start_link, []},
+ temporary, infinity, supervisor, [rabbit_channel_sup]}]}}.
diff --git a/deps/rabbit/src/rabbit_channel_tracking.erl b/deps/rabbit/src/rabbit_channel_tracking.erl
new file mode 100644
index 0000000000..42ab664a06
--- /dev/null
+++ b/deps/rabbit/src/rabbit_channel_tracking.erl
@@ -0,0 +1,291 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_channel_tracking).
+
+%% Abstracts away how tracked connection records are stored
+%% and queried.
+%%
+%% See also:
+%%
+%% * rabbit_channel_tracking_handler
+%% * rabbit_reader
+%% * rabbit_event
+-behaviour(rabbit_tracking).
+
+-export([boot/0,
+ update_tracked/1,
+ handle_cast/1,
+ register_tracked/1,
+ unregister_tracked/1,
+ count_tracked_items_in/1,
+ clear_tracking_tables/0,
+ shutdown_tracked_items/2]).
+
+-export([list/0, list_of_user/1, list_on_node/1,
+ tracked_channel_table_name_for/1,
+ tracked_channel_per_user_table_name_for/1,
+ get_all_tracked_channel_table_names_for_node/1,
+ delete_tracked_channel_user_entry/1]).
+
+-include_lib("rabbit.hrl").
+
+-import(rabbit_misc, [pget/2]).
+
+%%
+%% API
+%%
+
+%% Sets up and resets channel tracking tables for this node.
+-spec boot() -> ok.
+
+boot() ->
+ ensure_tracked_channels_table_for_this_node(),
+ rabbit_log:info("Setting up a table for channel tracking on this node: ~p",
+ [tracked_channel_table_name_for(node())]),
+ ensure_per_user_tracked_channels_table_for_node(),
+ rabbit_log:info("Setting up a table for channel tracking on this node: ~p",
+ [tracked_channel_per_user_table_name_for(node())]),
+ clear_tracking_tables(),
+ ok.
+
+-spec update_tracked(term()) -> ok.
+
+update_tracked(Event) ->
+ spawn(?MODULE, handle_cast, [Event]),
+ ok.
+
+%% Asynchronously handle update events
+-spec handle_cast(term()) -> ok.
+
+handle_cast({channel_created, Details}) ->
+ ThisNode = node(),
+ case node(pget(pid, Details)) of
+ ThisNode ->
+ TrackedCh = #tracked_channel{id = TrackedChId} =
+ tracked_channel_from_channel_created_event(Details),
+ try
+ register_tracked(TrackedCh)
+ catch
+ error:{no_exists, _} ->
+ Msg = "Could not register channel ~p for tracking, "
+ "its table is not ready yet or the channel terminated prematurely",
+ rabbit_log_connection:warning(Msg, [TrackedChId]),
+ ok;
+ error:Err ->
+ Msg = "Could not register channel ~p for tracking: ~p",
+ rabbit_log_connection:warning(Msg, [TrackedChId, Err]),
+ ok
+ end;
+ _OtherNode ->
+ %% ignore
+ ok
+ end;
+handle_cast({channel_closed, Details}) ->
+ %% channel has terminated, unregister iff local
+ case get_tracked_channel_by_pid(pget(pid, Details)) of
+ [#tracked_channel{name = Name}] ->
+ unregister_tracked(rabbit_tracking:id(node(), Name));
+ _Other -> ok
+ end;
+handle_cast({connection_closed, ConnDetails}) ->
+ ThisNode= node(),
+ ConnPid = pget(pid, ConnDetails),
+
+ case pget(node, ConnDetails) of
+ ThisNode ->
+ TrackedChs = get_tracked_channels_by_connection_pid(ConnPid),
+ rabbit_log_connection:info(
+ "Closing all channels from connection '~p' "
+ "because it has been closed", [pget(name, ConnDetails)]),
+ %% Shutting down channels will take care of unregistering the
+ %% corresponding tracking.
+ shutdown_tracked_items(TrackedChs, undefined),
+ ok;
+ _DifferentNode ->
+ ok
+ end;
+handle_cast({user_deleted, Details}) ->
+ Username = pget(name, Details),
+ %% Schedule user entry deletion, allowing time for connections to close
+ _ = timer:apply_after(?TRACKING_EXECUTION_TIMEOUT, ?MODULE,
+ delete_tracked_channel_user_entry, [Username]),
+ ok;
+handle_cast({node_deleted, Details}) ->
+ Node = pget(node, Details),
+ rabbit_log_connection:info(
+ "Node '~s' was removed from the cluster, deleting"
+ " its channel tracking tables...", [Node]),
+ delete_tracked_channels_table_for_node(Node),
+ delete_per_user_tracked_channels_table_for_node(Node).
+
+-spec register_tracked(rabbit_types:tracked_channel()) -> ok.
+-dialyzer([{nowarn_function, [register_tracked/1]}, race_conditions]).
+
+register_tracked(TrackedCh =
+ #tracked_channel{node = Node, name = Name, username = Username}) ->
+ ChId = rabbit_tracking:id(Node, Name),
+ TableName = tracked_channel_table_name_for(Node),
+ PerUserChTableName = tracked_channel_per_user_table_name_for(Node),
+ %% upsert
+ case mnesia:dirty_read(TableName, ChId) of
+ [] ->
+ mnesia:dirty_write(TableName, TrackedCh),
+ mnesia:dirty_update_counter(PerUserChTableName, Username, 1);
+ [#tracked_channel{}] ->
+ ok
+ end,
+ ok.
+
+-spec unregister_tracked(rabbit_types:tracked_channel_id()) -> ok.
+
+unregister_tracked(ChId = {Node, _Name}) when Node =:= node() ->
+ TableName = tracked_channel_table_name_for(Node),
+ PerUserChannelTableName = tracked_channel_per_user_table_name_for(Node),
+ case mnesia:dirty_read(TableName, ChId) of
+ [] -> ok;
+ [#tracked_channel{username = Username}] ->
+ mnesia:dirty_update_counter(PerUserChannelTableName, Username, -1),
+ mnesia:dirty_delete(TableName, ChId)
+ end.
+
+-spec count_tracked_items_in({atom(), rabbit_types:username()}) -> non_neg_integer().
+
+count_tracked_items_in({user, Username}) ->
+ rabbit_tracking:count_tracked_items(
+ fun tracked_channel_per_user_table_name_for/1,
+ #tracked_channel_per_user.channel_count, Username,
+ "channels in vhost").
+
+-spec clear_tracking_tables() -> ok.
+
+clear_tracking_tables() ->
+ clear_tracked_channel_tables_for_this_node(),
+ ok.
+
+-spec shutdown_tracked_items(list(), term()) -> ok.
+
+shutdown_tracked_items(TrackedItems, _Args) ->
+ close_channels(TrackedItems).
+
+%% helper functions
+-spec list() -> [rabbit_types:tracked_channel()].
+
+list() ->
+ lists:foldl(
+ fun (Node, Acc) ->
+ Tab = tracked_channel_table_name_for(Node),
+ Acc ++ mnesia:dirty_match_object(Tab, #tracked_channel{_ = '_'})
+ end, [], rabbit_nodes:all_running()).
+
+-spec list_of_user(rabbit_types:username()) -> [rabbit_types:tracked_channel()].
+
+list_of_user(Username) ->
+ rabbit_tracking:match_tracked_items(
+ fun tracked_channel_table_name_for/1,
+ #tracked_channel{username = Username, _ = '_'}).
+
+-spec list_on_node(node()) -> [rabbit_types:tracked_channel()].
+
+list_on_node(Node) ->
+ try mnesia:dirty_match_object(
+ tracked_channel_table_name_for(Node),
+ #tracked_channel{_ = '_'})
+ catch exit:{aborted, {no_exists, _}} -> []
+ end.
+
+-spec tracked_channel_table_name_for(node()) -> atom().
+
+tracked_channel_table_name_for(Node) ->
+ list_to_atom(rabbit_misc:format("tracked_channel_on_node_~s", [Node])).
+
+-spec tracked_channel_per_user_table_name_for(node()) -> atom().
+
+tracked_channel_per_user_table_name_for(Node) ->
+ list_to_atom(rabbit_misc:format(
+ "tracked_channel_table_per_user_on_node_~s", [Node])).
+
+%% internal
+ensure_tracked_channels_table_for_this_node() ->
+ ensure_tracked_channels_table_for_node(node()).
+
+ensure_per_user_tracked_channels_table_for_node() ->
+ ensure_per_user_tracked_channels_table_for_node(node()).
+
+%% Create tables
+ensure_tracked_channels_table_for_node(Node) ->
+ TableName = tracked_channel_table_name_for(Node),
+ case mnesia:create_table(TableName, [{record_name, tracked_channel},
+ {attributes, record_info(fields, tracked_channel)}]) of
+ {atomic, ok} -> ok;
+ {aborted, {already_exists, _}} -> ok;
+ {aborted, Error} ->
+ rabbit_log:error("Failed to create a tracked channel table for node ~p: ~p", [Node, Error]),
+ ok
+ end.
+
+ensure_per_user_tracked_channels_table_for_node(Node) ->
+ TableName = tracked_channel_per_user_table_name_for(Node),
+ case mnesia:create_table(TableName, [{record_name, tracked_channel_per_user},
+ {attributes, record_info(fields, tracked_channel_per_user)}]) of
+ {atomic, ok} -> ok;
+ {aborted, {already_exists, _}} -> ok;
+ {aborted, Error} ->
+ rabbit_log:error("Failed to create a per-user tracked channel table for node ~p: ~p", [Node, Error]),
+ ok
+ end.
+
+clear_tracked_channel_tables_for_this_node() ->
+ [rabbit_tracking:clear_tracking_table(T)
+ || T <- get_all_tracked_channel_table_names_for_node(node())].
+
+delete_tracked_channels_table_for_node(Node) ->
+ TableName = tracked_channel_table_name_for(Node),
+ rabbit_tracking:delete_tracking_table(TableName, Node, "tracked channel").
+
+delete_per_user_tracked_channels_table_for_node(Node) ->
+ TableName = tracked_channel_per_user_table_name_for(Node),
+ rabbit_tracking:delete_tracking_table(TableName, Node,
+ "per-user tracked channels").
+
+get_all_tracked_channel_table_names_for_node(Node) ->
+ [tracked_channel_table_name_for(Node),
+ tracked_channel_per_user_table_name_for(Node)].
+
+get_tracked_channels_by_connection_pid(ConnPid) ->
+ rabbit_tracking:match_tracked_items(
+ fun tracked_channel_table_name_for/1,
+ #tracked_channel{connection = ConnPid, _ = '_'}).
+
+get_tracked_channel_by_pid(ChPid) ->
+ rabbit_tracking:match_tracked_items(
+ fun tracked_channel_table_name_for/1,
+ #tracked_channel{pid = ChPid, _ = '_'}).
+
+delete_tracked_channel_user_entry(Username) ->
+ rabbit_tracking:delete_tracked_entry(
+ {rabbit_auth_backend_internal, exists, [Username]},
+ fun tracked_channel_per_user_table_name_for/1,
+ Username).
+
+tracked_channel_from_channel_created_event(ChannelDetails) ->
+ Node = node(ChPid = pget(pid, ChannelDetails)),
+ Name = pget(name, ChannelDetails),
+ #tracked_channel{
+ id = rabbit_tracking:id(Node, Name),
+ name = Name,
+ node = Node,
+ vhost = pget(vhost, ChannelDetails),
+ pid = ChPid,
+ connection = pget(connection, ChannelDetails),
+ username = pget(user, ChannelDetails)}.
+
+close_channels(TrackedChannels = [#tracked_channel{}|_]) ->
+ [rabbit_channel:shutdown(ChPid)
+ || #tracked_channel{pid = ChPid} <- TrackedChannels],
+ ok;
+close_channels(_TrackedChannels = []) -> ok.
diff --git a/deps/rabbit/src/rabbit_channel_tracking_handler.erl b/deps/rabbit/src/rabbit_channel_tracking_handler.erl
new file mode 100644
index 0000000000..0cbe02f39e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_channel_tracking_handler.erl
@@ -0,0 +1,71 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_channel_tracking_handler).
+
+%% This module keeps track of channel creation and termination events
+%% on its local node. Similar to the rabbit_connection_tracking_handler,
+%% the primary goal here is to decouple channel tracking from rabbit_reader
+%% and isolate channel tracking to its own process to avoid blocking connection
+%% creation events. Additionaly, creation events are also non-blocking in that
+%% they spawn a short-live process for updating the tracking tables in realtime.
+%%
+%% Events from other nodes are ignored.
+
+-behaviour(gen_event).
+
+-export([init/1, handle_call/2, handle_event/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-include_lib("rabbit.hrl").
+
+-rabbit_boot_step({?MODULE,
+ [{description, "channel tracking event handler"},
+ {mfa, {gen_event, add_handler,
+ [rabbit_event, ?MODULE, []]}},
+ {cleanup, {gen_event, delete_handler,
+ [rabbit_event, ?MODULE, []]}},
+ {requires, [channel_tracking]},
+ {enables, recovery}]}).
+
+%%
+%% API
+%%
+
+init([]) ->
+ {ok, []}.
+
+handle_event(#event{type = channel_created, props = Details}, State) ->
+ ok = rabbit_channel_tracking:update_tracked({channel_created, Details}),
+ {ok, State};
+handle_event(#event{type = channel_closed, props = Details}, State) ->
+ ok = rabbit_channel_tracking:update_tracked({channel_closed, Details}),
+ {ok, State};
+handle_event(#event{type = connection_closed, props = Details}, State) ->
+ ok = rabbit_channel_tracking:update_tracked({connection_closed, Details}),
+ {ok, State};
+handle_event(#event{type = user_deleted, props = Details}, State) ->
+ ok = rabbit_channel_tracking:update_tracked({user_deleted, Details}),
+ {ok, State};
+%% A node had been deleted from the cluster.
+handle_event(#event{type = node_deleted, props = Details}, State) ->
+ ok = rabbit_channel_tracking:update_tracked({node_deleted, Details}),
+ {ok, State};
+handle_event(_Event, State) ->
+ {ok, State}.
+
+handle_call(_Request, State) ->
+ {ok, not_understood, State}.
+
+handle_info(_Info, State) ->
+ {ok, State}.
+
+terminate(_Arg, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
diff --git a/deps/rabbit/src/rabbit_classic_queue.erl b/deps/rabbit/src/rabbit_classic_queue.erl
new file mode 100644
index 0000000000..e53c0aecc2
--- /dev/null
+++ b/deps/rabbit/src/rabbit_classic_queue.erl
@@ -0,0 +1,527 @@
+-module(rabbit_classic_queue).
+-behaviour(rabbit_queue_type).
+
+-include("amqqueue.hrl").
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-record(msg_status, {pending :: [pid()],
+ confirmed = [] :: [pid()]}).
+
+-record(?MODULE, {pid :: undefined | pid(), %% the current master pid
+ qref :: term(), %% TODO
+ unconfirmed = #{} ::
+ #{non_neg_integer() => #msg_status{}}}).
+-define(STATE, ?MODULE).
+
+-opaque state() :: #?STATE{}.
+
+-export_type([state/0]).
+
+-export([
+ is_enabled/0,
+ declare/2,
+ delete/4,
+ is_recoverable/1,
+ recover/2,
+ purge/1,
+ policy_changed/1,
+ stat/1,
+ init/1,
+ close/1,
+ update/2,
+ consume/3,
+ cancel/5,
+ handle_event/2,
+ deliver/2,
+ settle/4,
+ credit/4,
+ dequeue/4,
+ info/2,
+ state_info/1,
+ capabilities/0
+ ]).
+
+-export([delete_crashed/1,
+ delete_crashed/2,
+ delete_crashed_internal/2]).
+
+-export([confirm_to_sender/3,
+ send_rejection/3,
+ send_queue_event/3]).
+
+is_enabled() -> true.
+
+declare(Q, Node) when ?amqqueue_is_classic(Q) ->
+ QName = amqqueue:get_name(Q),
+ VHost = amqqueue:get_vhost(Q),
+ Node1 = case Node of
+ {ignore_location, Node0} ->
+ Node0;
+ _ ->
+ case rabbit_queue_master_location_misc:get_location(Q) of
+ {ok, Node0} -> Node0;
+ _ -> Node
+ end
+ end,
+ Node1 = rabbit_mirror_queue_misc:initial_queue_node(Q, Node1),
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost, Node1) of
+ {ok, _} ->
+ gen_server2:call(
+ rabbit_amqqueue_sup_sup:start_queue_process(Node1, Q, declare),
+ {init, new}, infinity);
+ {error, Error} ->
+ {protocol_error, internal_error, "Cannot declare a queue '~s' on node '~s': ~255p",
+ [rabbit_misc:rs(QName), Node1, Error]}
+ end.
+
+delete(Q, IfUnused, IfEmpty, ActingUser) when ?amqqueue_is_classic(Q) ->
+ case wait_for_promoted_or_stopped(Q) of
+ {promoted, Q1} ->
+ QPid = amqqueue:get_pid(Q1),
+ delegate:invoke(QPid, {gen_server2, call,
+ [{delete, IfUnused, IfEmpty, ActingUser},
+ infinity]});
+ {stopped, Q1} ->
+ #resource{name = Name, virtual_host = Vhost} = amqqueue:get_name(Q1),
+ case IfEmpty of
+ true ->
+ rabbit_log:error("Queue ~s in vhost ~s has its master node down and "
+ "no mirrors available or eligible for promotion. "
+ "The queue may be non-empty. "
+ "Refusing to force-delete.",
+ [Name, Vhost]),
+ {error, not_empty};
+ false ->
+ rabbit_log:warning("Queue ~s in vhost ~s has its master node is down and "
+ "no mirrors available or eligible for promotion. "
+ "Forcing queue deletion.",
+ [Name, Vhost]),
+ delete_crashed_internal(Q1, ActingUser),
+ {ok, 0}
+ end;
+ {error, not_found} ->
+ %% Assume the queue was deleted
+ {ok, 0}
+ end.
+
+is_recoverable(Q) when ?is_amqqueue(Q) ->
+ Node = node(),
+ Node =:= node(amqqueue:get_pid(Q)) andalso
+ %% Terminations on node down will not remove the rabbit_queue
+ %% record if it is a mirrored queue (such info is now obtained from
+ %% the policy). Thus, we must check if the local pid is alive
+ %% - if the record is present - in order to restart.
+ (mnesia:read(rabbit_queue, amqqueue:get_name(Q), read) =:= []
+ orelse not rabbit_mnesia:is_process_alive(amqqueue:get_pid(Q))).
+
+recover(VHost, Queues) ->
+ {ok, BQ} = application:get_env(rabbit, backing_queue_module),
+ %% We rely on BQ:start/1 returning the recovery terms in the same
+ %% order as the supplied queue names, so that we can zip them together
+ %% for further processing in recover_durable_queues.
+ {ok, OrderedRecoveryTerms} =
+ BQ:start(VHost, [amqqueue:get_name(Q) || Q <- Queues]),
+ case rabbit_amqqueue_sup_sup:start_for_vhost(VHost) of
+ {ok, _} ->
+ RecoveredQs = recover_durable_queues(lists:zip(Queues,
+ OrderedRecoveryTerms)),
+ RecoveredNames = [amqqueue:get_name(Q) || Q <- RecoveredQs],
+ FailedQueues = [Q || Q <- Queues,
+ not lists:member(amqqueue:get_name(Q), RecoveredNames)],
+ {RecoveredQs, FailedQueues};
+ {error, Reason} ->
+ rabbit_log:error("Failed to start queue supervisor for vhost '~s': ~s", [VHost, Reason]),
+ throw({error, Reason})
+ end.
+
+-spec policy_changed(amqqueue:amqqueue()) -> ok.
+policy_changed(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ gen_server2:cast(QPid, policy_changed).
+
+stat(Q) ->
+ delegate:invoke(amqqueue:get_pid(Q),
+ {gen_server2, call, [stat, infinity]}).
+
+-spec init(amqqueue:amqqueue()) -> state().
+init(Q) when ?amqqueue_is_classic(Q) ->
+ QName = amqqueue:get_name(Q),
+ #?STATE{pid = amqqueue:get_pid(Q),
+ qref = QName}.
+
+-spec close(state()) -> ok.
+close(_State) ->
+ ok.
+
+-spec update(amqqueue:amqqueue(), state()) -> state().
+update(Q, #?STATE{pid = Pid} = State) when ?amqqueue_is_classic(Q) ->
+ case amqqueue:get_pid(Q) of
+ Pid ->
+ State;
+ NewPid ->
+ %% master pid is different, update
+ State#?STATE{pid = NewPid}
+ end.
+
+consume(Q, Spec, State) when ?amqqueue_is_classic(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ QRef = amqqueue:get_name(Q),
+ #{no_ack := NoAck,
+ channel_pid := ChPid,
+ limiter_pid := LimiterPid,
+ limiter_active := LimiterActive,
+ prefetch_count := ConsumerPrefetchCount,
+ consumer_tag := ConsumerTag,
+ exclusive_consume := ExclusiveConsume,
+ args := Args,
+ ok_msg := OkMsg,
+ acting_user := ActingUser} = Spec,
+ case delegate:invoke(QPid,
+ {gen_server2, call,
+ [{basic_consume, NoAck, ChPid, LimiterPid,
+ LimiterActive, ConsumerPrefetchCount, ConsumerTag,
+ ExclusiveConsume, Args, OkMsg, ActingUser},
+ infinity]}) of
+ ok ->
+ %% ask the host process to monitor this pid
+ %% TODO: track pids as they change
+ {ok, State#?STATE{pid = QPid}, [{monitor, QPid, QRef}]};
+ Err ->
+ Err
+ end.
+
+cancel(Q, ConsumerTag, OkMsg, ActingUser, State) ->
+ QPid = amqqueue:get_pid(Q),
+ case delegate:invoke(QPid, {gen_server2, call,
+ [{basic_cancel, self(), ConsumerTag,
+ OkMsg, ActingUser}, infinity]}) of
+ ok ->
+ {ok, State};
+ Err -> Err
+ end.
+
+-spec settle(rabbit_queue_type:settle_op(), rabbit_types:ctag(),
+ [non_neg_integer()], state()) ->
+ {state(), rabbit_queue_type:actions()}.
+settle(complete, _CTag, MsgIds, State) ->
+ Pid = State#?STATE.pid,
+ delegate:invoke_no_result(Pid,
+ {gen_server2, cast, [{ack, MsgIds, self()}]}),
+ {State, []};
+settle(Op, _CTag, MsgIds, State) ->
+ ChPid = self(),
+ ok = delegate:invoke_no_result(State#?STATE.pid,
+ {gen_server2, cast,
+ [{reject, Op == requeue, MsgIds, ChPid}]}),
+ {State, []}.
+
+credit(CTag, Credit, Drain, State) ->
+ ChPid = self(),
+ delegate:invoke_no_result(State#?STATE.pid,
+ {gen_server2, cast,
+ [{credit, ChPid, CTag, Credit, Drain}]}),
+ {State, []}.
+
+handle_event({confirm, MsgSeqNos, Pid}, #?STATE{qref = QRef,
+ unconfirmed = U0} = State) ->
+ %% confirms should never result in rejections
+ {Unconfirmed, ConfirmedSeqNos, []} =
+ settle_seq_nos(MsgSeqNos, Pid, U0, confirm),
+ Actions = [{settled, QRef, ConfirmedSeqNos}],
+ %% handle confirm event from queues
+ %% in this case the classic queue should track each individual publish and
+ %% the processes involved and only emit a settle action once they have all
+ %% been received (or DOWN has been received).
+ %% Hence this part of the confirm logic is queue specific.
+ {ok, State#?STATE{unconfirmed = Unconfirmed}, Actions};
+handle_event({reject_publish, SeqNo, _QPid},
+ #?STATE{qref = QRef,
+ unconfirmed = U0} = State) ->
+ %% It does not matter which queue rejected the message,
+ %% if any queue did, it should not be confirmed.
+ {U, Rejected} = reject_seq_no(SeqNo, U0),
+ Actions = [{rejected, QRef, Rejected}],
+ {ok, State#?STATE{unconfirmed = U}, Actions};
+handle_event({down, Pid, Info}, #?STATE{qref = QRef,
+ pid = MasterPid,
+ unconfirmed = U0} = State0) ->
+ Actions0 = case Pid =:= MasterPid of
+ true ->
+ [{queue_down, QRef}];
+ false ->
+ []
+ end,
+ case rabbit_misc:is_abnormal_exit(Info) of
+ false when Info =:= normal andalso Pid == MasterPid ->
+ %% queue was deleted and masterpid is down
+ eol;
+ false ->
+ %% this assumes the mirror isn't part of the active set
+ MsgSeqNos = maps:keys(
+ maps:filter(fun (_, #msg_status{pending = Pids}) ->
+ lists:member(Pid, Pids)
+ end, U0)),
+ {Unconfirmed, Settled, Rejected} =
+ settle_seq_nos(MsgSeqNos, Pid, U0, down),
+ Actions = settlement_action(
+ settled, QRef, Settled,
+ settlement_action(rejected, QRef, Rejected, Actions0)),
+ {ok, State0#?STATE{unconfirmed = Unconfirmed}, Actions};
+ true ->
+ %% any abnormal exit should be considered a full reject of the
+ %% oustanding message ids - If the message didn't get to all
+ %% mirrors we have to assume it will never get there
+ MsgIds = maps:fold(
+ fun (SeqNo, Status, Acc) ->
+ case lists:member(Pid, Status#msg_status.pending) of
+ true ->
+ [SeqNo | Acc];
+ false ->
+ Acc
+ end
+ end, [], U0),
+ U = maps:without(MsgIds, U0),
+ {ok, State0#?STATE{unconfirmed = U},
+ [{rejected, QRef, MsgIds} | Actions0]}
+ end;
+handle_event({send_credit_reply, _} = Action, State) ->
+ {ok, State, [Action]}.
+
+settlement_action(_Type, _QRef, [], Acc) ->
+ Acc;
+settlement_action(Type, QRef, MsgSeqs, Acc) ->
+ [{Type, QRef, MsgSeqs} | Acc].
+
+-spec deliver([{amqqueue:amqqueue(), state()}],
+ Delivery :: term()) ->
+ {[{amqqueue:amqqueue(), state()}], rabbit_queue_type:actions()}.
+deliver(Qs0, #delivery{flow = Flow,
+ msg_seq_no = MsgNo,
+ message = #basic_message{exchange_name = _Ex},
+ confirm = _Confirm} = Delivery) ->
+ %% TODO: record master and slaves for confirm processing
+ {MPids, SPids, Qs, Actions} = qpids(Qs0, MsgNo),
+ QPids = MPids ++ SPids,
+ case Flow of
+ %% Here we are tracking messages sent by the rabbit_channel
+ %% process. We are accessing the rabbit_channel process
+ %% dictionary.
+ flow -> [credit_flow:send(QPid) || QPid <- QPids],
+ [credit_flow:send(QPid) || QPid <- SPids];
+ noflow -> ok
+ end,
+ MMsg = {deliver, Delivery, false},
+ SMsg = {deliver, Delivery, true},
+ delegate:invoke_no_result(MPids, {gen_server2, cast, [MMsg]}),
+ delegate:invoke_no_result(SPids, {gen_server2, cast, [SMsg]}),
+ {Qs, Actions}.
+
+
+-spec dequeue(NoAck :: boolean(), LimiterPid :: pid(),
+ rabbit_types:ctag(), state()) ->
+ {ok, Count :: non_neg_integer(), rabbit_amqqueue:qmsg(), state()} |
+ {empty, state()}.
+dequeue(NoAck, LimiterPid, _CTag, State) ->
+ QPid = State#?STATE.pid,
+ case delegate:invoke(QPid, {gen_server2, call,
+ [{basic_get, self(), NoAck, LimiterPid}, infinity]}) of
+ empty ->
+ {empty, State};
+ {ok, Count, Msg} ->
+ {ok, Count, Msg, State}
+ end.
+
+-spec state_info(state()) -> #{atom() := term()}.
+state_info(_State) ->
+ #{}.
+
+%% general queue info
+-spec info(amqqueue:amqqueue(), all_keys | rabbit_types:info_keys()) ->
+ rabbit_types:infos().
+info(Q, Items) ->
+ QPid = amqqueue:get_pid(Q),
+ Req = case Items of
+ all_keys -> info;
+ _ -> {info, Items}
+ end,
+ case delegate:invoke(QPid, {gen_server2, call, [Req, infinity]}) of
+ {ok, Result} ->
+ Result;
+ {error, _Err} ->
+ [];
+ Result when is_list(Result) ->
+ %% this is a backwards compatibility clause
+ Result
+ end.
+
+-spec purge(amqqueue:amqqueue()) ->
+ {ok, non_neg_integer()}.
+purge(Q) when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ delegate:invoke(QPid, {gen_server2, call, [purge, infinity]}).
+
+qpids(Qs, MsgNo) ->
+ lists:foldl(
+ fun ({Q, S0}, {MPidAcc, SPidAcc, Qs0, Actions0}) ->
+ QPid = amqqueue:get_pid(Q),
+ SPids = amqqueue:get_slave_pids(Q),
+ QRef = amqqueue:get_name(Q),
+ Actions = [{monitor, QPid, QRef}
+ | [{monitor, P, QRef} || P <- SPids]] ++ Actions0,
+ %% confirm record only if MsgNo isn't undefined
+ S = case S0 of
+ #?STATE{unconfirmed = U0} ->
+ Rec = [QPid | SPids],
+ U = case MsgNo of
+ undefined ->
+ U0;
+ _ ->
+ U0#{MsgNo => #msg_status{pending = Rec}}
+ end,
+ S0#?STATE{pid = QPid,
+ unconfirmed = U};
+ stateless ->
+ S0
+ end,
+ {[QPid | MPidAcc], SPidAcc ++ SPids,
+ [{Q, S} | Qs0], Actions}
+ end, {[], [], [], []}, Qs).
+
+%% internal-ish
+-spec wait_for_promoted_or_stopped(amqqueue:amqqueue()) ->
+ {promoted, amqqueue:amqqueue()} |
+ {stopped, amqqueue:amqqueue()} |
+ {error, not_found}.
+wait_for_promoted_or_stopped(Q0) ->
+ QName = amqqueue:get_name(Q0),
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} ->
+ QPid = amqqueue:get_pid(Q),
+ SPids = amqqueue:get_slave_pids(Q),
+ case rabbit_mnesia:is_process_alive(QPid) of
+ true -> {promoted, Q};
+ false ->
+ case lists:any(fun(Pid) ->
+ rabbit_mnesia:is_process_alive(Pid)
+ end, SPids) of
+ %% There is a live slave. May be promoted
+ true ->
+ timer:sleep(100),
+ wait_for_promoted_or_stopped(Q);
+ %% All slave pids are stopped.
+ %% No process left for the queue
+ false -> {stopped, Q}
+ end
+ end;
+ {error, not_found} ->
+ {error, not_found}
+ end.
+
+-spec delete_crashed(amqqueue:amqqueue()) -> ok.
+delete_crashed(Q) ->
+ delete_crashed(Q, ?INTERNAL_USER).
+
+delete_crashed(Q, ActingUser) ->
+ ok = rpc:call(amqqueue:qnode(Q), ?MODULE, delete_crashed_internal,
+ [Q, ActingUser]).
+
+delete_crashed_internal(Q, ActingUser) ->
+ QName = amqqueue:get_name(Q),
+ {ok, BQ} = application:get_env(rabbit, backing_queue_module),
+ BQ:delete_crashed(Q),
+ ok = rabbit_amqqueue:internal_delete(QName, ActingUser).
+
+recover_durable_queues(QueuesAndRecoveryTerms) ->
+ {Results, Failures} =
+ gen_server2:mcall(
+ [{rabbit_amqqueue_sup_sup:start_queue_process(node(), Q, recovery),
+ {init, {self(), Terms}}} || {Q, Terms} <- QueuesAndRecoveryTerms]),
+ [rabbit_log:error("Queue ~p failed to initialise: ~p~n",
+ [Pid, Error]) || {Pid, Error} <- Failures],
+ [Q || {_, {new, Q}} <- Results].
+
+capabilities() ->
+ #{policies => [<<"expires">>, <<"message-ttl">>, <<"dead-letter-exchange">>,
+ <<"dead-letter-routing-key">>, <<"max-length">>,
+ <<"max-length-bytes">>, <<"max-in-memory-length">>, <<"max-in-memory-bytes">>,
+ <<"max-priority">>, <<"overflow">>, <<"queue-mode">>,
+ <<"single-active-consumer">>, <<"delivery-limit">>,
+ <<"ha-mode">>, <<"ha-params">>, <<"ha-sync-mode">>,
+ <<"ha-promote-on-shutdown">>, <<"ha-promote-on-failure">>,
+ <<"queue-master-locator">>],
+ queue_arguments => [<<"x-expires">>, <<"x-message-ttl">>, <<"x-dead-letter-exchange">>,
+ <<"x-dead-letter-routing-key">>, <<"x-max-length">>,
+ <<"x-max-length-bytes">>, <<"x-max-in-memory-length">>,
+ <<"x-max-in-memory-bytes">>, <<"x-max-priority">>,
+ <<"x-overflow">>, <<"x-queue-mode">>, <<"x-single-active-consumer">>,
+ <<"x-queue-type">>, <<"x-queue-master-locator">>],
+ consumer_arguments => [<<"x-cancel-on-ha-failover">>,
+ <<"x-priority">>, <<"x-credit">>
+ ],
+ server_named => true}.
+
+reject_seq_no(SeqNo, U0) ->
+ reject_seq_no(SeqNo, U0, []).
+
+reject_seq_no(SeqNo, U0, Acc) ->
+ case maps:take(SeqNo, U0) of
+ {_, U} ->
+ {U, [SeqNo | Acc]};
+ error ->
+ {U0, Acc}
+ end.
+
+settle_seq_nos(MsgSeqNos, Pid, U0, Reason) ->
+ lists:foldl(
+ fun (SeqNo, {U, C0, R0}) ->
+ case U of
+ #{SeqNo := Status0} ->
+ case update_msg_status(Reason, Pid, Status0) of
+ #msg_status{pending = [],
+ confirmed = []} ->
+ %% no pending left and nothing confirmed
+ %% then we reject it
+ {maps:remove(SeqNo, U), C0, [SeqNo | R0]};
+ #msg_status{pending = [],
+ confirmed = _} ->
+ %% this can be confirmed as there are no pending
+ %% and confirmed isn't empty
+ {maps:remove(SeqNo, U), [SeqNo | C0], R0};
+ MsgStatus ->
+ {U#{SeqNo => MsgStatus}, C0, R0}
+ end;
+ _ ->
+ {U, C0, R0}
+ end
+ end, {U0, [], []}, MsgSeqNos).
+
+update_msg_status(confirm, Pid, #msg_status{pending = P,
+ confirmed = C} = S) ->
+ Rem = lists:delete(Pid, P),
+ S#msg_status{pending = Rem, confirmed = [Pid | C]};
+update_msg_status(down, Pid, #msg_status{pending = P} = S) ->
+ S#msg_status{pending = lists:delete(Pid, P)}.
+
+%% part of channel <-> queue api
+confirm_to_sender(Pid, QName, MsgSeqNos) ->
+ %% the stream queue included the queue type refactoring and thus requires
+ %% a different message format
+ Evt = case rabbit_ff_registry:is_enabled(stream_queue) of
+ true ->
+ {queue_event, QName, {confirm, MsgSeqNos, self()}};
+ false ->
+ {confirm, MsgSeqNos, self()}
+ end,
+ gen_server2:cast(Pid, Evt).
+
+send_rejection(Pid, QName, MsgSeqNo) ->
+ case rabbit_ff_registry:is_enabled(stream_queue) of
+ true ->
+ gen_server2:cast(Pid, {queue_event, QName,
+ {reject_publish, MsgSeqNo, self()}});
+ false ->
+ gen_server2:cast(Pid, {reject_publish, MsgSeqNo, self()})
+ end.
+
+send_queue_event(Pid, QName, Evt) ->
+ gen_server2:cast(Pid, {queue_event, QName, Evt}).
diff --git a/deps/rabbit/src/rabbit_client_sup.erl b/deps/rabbit/src/rabbit_client_sup.erl
new file mode 100644
index 0000000000..a28e4ce39c
--- /dev/null
+++ b/deps/rabbit/src/rabbit_client_sup.erl
@@ -0,0 +1,43 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_client_sup).
+
+-behaviour(supervisor2).
+
+-export([start_link/1, start_link/2, start_link_worker/2]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-spec start_link(rabbit_types:mfargs()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(Callback) ->
+ supervisor2:start_link(?MODULE, Callback).
+
+-spec start_link({'local', atom()}, rabbit_types:mfargs()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(SupName, Callback) ->
+ supervisor2:start_link(SupName, ?MODULE, Callback).
+
+-spec start_link_worker({'local', atom()}, rabbit_types:mfargs()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link_worker(SupName, Callback) ->
+ supervisor2:start_link(SupName, ?MODULE, {Callback, worker}).
+
+init({M,F,A}) ->
+ {ok, {{simple_one_for_one, 0, 1},
+ [{client, {M,F,A}, temporary, infinity, supervisor, [M]}]}};
+init({{M,F,A}, worker}) ->
+ {ok, {{simple_one_for_one, 0, 1},
+ [{client, {M,F,A}, temporary, ?WORKER_WAIT, worker, [M]}]}}.
diff --git a/deps/rabbit/src/rabbit_config.erl b/deps/rabbit/src/rabbit_config.erl
new file mode 100644
index 0000000000..1198035a7a
--- /dev/null
+++ b/deps/rabbit/src/rabbit_config.erl
@@ -0,0 +1,46 @@
+-module(rabbit_config).
+
+-export([
+ config_files/0,
+ get_advanced_config/0
+ ]).
+
+-export([schema_dir/0]).
+-deprecated([{schema_dir, 0, eventually}]).
+
+-export_type([config_location/0]).
+
+-type config_location() :: string().
+
+get_confs() ->
+ case get_prelaunch_config_state() of
+ #{config_files := Confs} -> Confs;
+ _ -> []
+ end.
+
+schema_dir() ->
+ undefined.
+
+get_advanced_config() ->
+ case get_prelaunch_config_state() of
+ %% There can be only one advanced.config
+ #{config_advanced_file := FileName} when FileName =/= undefined ->
+ case rabbit_file:is_file(FileName) of
+ true -> FileName;
+ false -> none
+ end;
+ _ -> none
+ end.
+
+-spec config_files() -> [config_location()].
+config_files() ->
+ ConfFiles = [filename:absname(File) || File <- get_confs(),
+ filelib:is_regular(File)],
+ AdvancedFiles = case get_advanced_config() of
+ none -> [];
+ FileName -> [filename:absname(FileName)]
+ end,
+ AdvancedFiles ++ ConfFiles.
+
+get_prelaunch_config_state() ->
+ rabbit_prelaunch_conf:get_config_state().
diff --git a/deps/rabbit/src/rabbit_confirms.erl b/deps/rabbit/src/rabbit_confirms.erl
new file mode 100644
index 0000000000..2fe032d1f1
--- /dev/null
+++ b/deps/rabbit/src/rabbit_confirms.erl
@@ -0,0 +1,152 @@
+-module(rabbit_confirms).
+
+-compile({no_auto_import, [size/1]}).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([init/0,
+ insert/4,
+ confirm/3,
+ reject/2,
+
+ remove_queue/2,
+
+ smallest/1,
+ size/1,
+ is_empty/1]).
+
+-type seq_no() :: non_neg_integer().
+-type queue_name() :: rabbit_amqqueue:name().
+-type exchange_name() :: rabbit_exchange:name().
+
+-record(?MODULE, {smallest :: undefined | seq_no(),
+ unconfirmed = #{} :: #{seq_no() =>
+ {exchange_name(),
+ #{queue_name() => ok}}}
+ }).
+
+-type mx() :: {seq_no(), exchange_name()}.
+
+-opaque state() :: #?MODULE{}.
+
+-export_type([
+ state/0
+ ]).
+
+-spec init() -> state().
+init() ->
+ #?MODULE{}.
+
+-spec insert(seq_no(), [queue_name()], exchange_name(), state()) ->
+ state().
+insert(SeqNo, QNames, #resource{kind = exchange} = XName,
+ #?MODULE{smallest = S0,
+ unconfirmed = U0} = State)
+ when is_integer(SeqNo)
+ andalso is_list(QNames)
+ andalso is_map_key(SeqNo, U0) == false ->
+ U = U0#{SeqNo => {XName, maps:from_list([{Q, ok} || Q <- QNames])}},
+ S = case S0 of
+ undefined -> SeqNo;
+ _ -> S0
+ end,
+ State#?MODULE{smallest = S,
+ unconfirmed = U}.
+
+-spec confirm([seq_no()], queue_name(), state()) ->
+ {[mx()], state()}.
+confirm(SeqNos, QName, #?MODULE{smallest = Smallest0,
+ unconfirmed = U0} = State)
+ when is_list(SeqNos) ->
+ {Confirmed, U} = lists:foldr(
+ fun (SeqNo, Acc) ->
+ confirm_one(SeqNo, QName, Acc)
+ end, {[], U0}, SeqNos),
+ %% check if smallest is in Confirmed
+ %% TODO: this can be optimised by checking in the preceeding foldr
+ Smallest =
+ case lists:any(fun ({S, _}) -> S == Smallest0 end, Confirmed) of
+ true ->
+ %% work out new smallest
+ next_smallest(Smallest0, U);
+ false ->
+ Smallest0
+ end,
+ {Confirmed, State#?MODULE{smallest = Smallest,
+ unconfirmed = U}}.
+
+-spec reject(seq_no(), state()) ->
+ {ok, mx(), state()} | {error, not_found}.
+reject(SeqNo, #?MODULE{smallest = Smallest0,
+ unconfirmed = U0} = State)
+ when is_integer(SeqNo) ->
+ case maps:take(SeqNo, U0) of
+ {{XName, _QS}, U} ->
+ Smallest = case SeqNo of
+ Smallest0 ->
+ %% need to scan as the smallest was removed
+ next_smallest(Smallest0, U);
+ _ ->
+ Smallest0
+ end,
+ {ok, {SeqNo, XName}, State#?MODULE{unconfirmed = U,
+ smallest = Smallest}};
+ error ->
+ {error, not_found}
+ end.
+
+%% idempotent
+-spec remove_queue(queue_name(), state()) ->
+ {[mx()], state()}.
+remove_queue(QName, #?MODULE{unconfirmed = U} = State) ->
+ SeqNos = maps:fold(
+ fun (SeqNo, {_XName, QS0}, Acc) ->
+ case maps:is_key(QName, QS0) of
+ true ->
+ [SeqNo | Acc];
+ false ->
+ Acc
+ end
+ end, [], U),
+ confirm(lists:sort(SeqNos), QName,State).
+
+-spec smallest(state()) -> seq_no() | undefined.
+smallest(#?MODULE{smallest = Smallest}) ->
+ Smallest.
+
+-spec size(state()) -> non_neg_integer().
+size(#?MODULE{unconfirmed = U}) ->
+ maps:size(U).
+
+-spec is_empty(state()) -> boolean().
+is_empty(State) ->
+ size(State) == 0.
+
+%% INTERNAL
+
+confirm_one(SeqNo, QName, {Acc, U0}) ->
+ case maps:take(SeqNo, U0) of
+ {{XName, QS}, U1}
+ when is_map_key(QName, QS)
+ andalso map_size(QS) == 1 ->
+ %% last queue confirm
+ {[{SeqNo, XName} | Acc], U1};
+ {{XName, QS}, U1} ->
+ {Acc, U1#{SeqNo => {XName, maps:remove(QName, QS)}}};
+ error ->
+ {Acc, U0}
+ end.
+
+next_smallest(_S, U) when map_size(U) == 0 ->
+ undefined;
+next_smallest(S, U) when is_map_key(S, U) ->
+ S;
+next_smallest(S, U) ->
+ %% TODO: this is potentially infinitely recursive if called incorrectly
+ next_smallest(S+1, U).
+
+
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
diff --git a/deps/rabbit/src/rabbit_connection_helper_sup.erl b/deps/rabbit/src/rabbit_connection_helper_sup.erl
new file mode 100644
index 0000000000..d0509029fd
--- /dev/null
+++ b/deps/rabbit/src/rabbit_connection_helper_sup.erl
@@ -0,0 +1,57 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_connection_helper_sup).
+
+%% Supervises auxiliary processes of AMQP 0-9-1 connections:
+%%
+%% * Channel supervisor
+%% * Heartbeat receiver
+%% * Heartbeat sender
+%% * Exclusive queue collector
+%%
+%% See also rabbit_heartbeat, rabbit_channel_sup_sup, rabbit_queue_collector.
+
+-behaviour(supervisor2).
+
+-export([start_link/0]).
+-export([start_channel_sup_sup/1,
+ start_queue_collector/2]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ supervisor2:start_link(?MODULE, []).
+
+-spec start_channel_sup_sup(pid()) -> rabbit_types:ok_pid_or_error().
+
+start_channel_sup_sup(SupPid) ->
+ supervisor2:start_child(
+ SupPid,
+ {channel_sup_sup, {rabbit_channel_sup_sup, start_link, []},
+ intrinsic, infinity, supervisor, [rabbit_channel_sup_sup]}).
+
+-spec start_queue_collector(pid(), rabbit_types:proc_name()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_queue_collector(SupPid, Identity) ->
+ supervisor2:start_child(
+ SupPid,
+ {collector, {rabbit_queue_collector, start_link, [Identity]},
+ intrinsic, ?WORKER_WAIT, worker, [rabbit_queue_collector]}).
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+ ?LG_PROCESS_TYPE(connection_helper_sup),
+ {ok, {{one_for_one, 10, 10}, []}}.
diff --git a/deps/rabbit/src/rabbit_connection_sup.erl b/deps/rabbit/src/rabbit_connection_sup.erl
new file mode 100644
index 0000000000..c1d1bd0d77
--- /dev/null
+++ b/deps/rabbit/src/rabbit_connection_sup.erl
@@ -0,0 +1,66 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_connection_sup).
+
+%% Supervisor for a (network) AMQP 0-9-1 client connection.
+%%
+%% Supervises
+%%
+%% * rabbit_reader
+%% * Auxiliary process supervisor
+%%
+%% See also rabbit_reader, rabbit_connection_helper_sup.
+
+-behaviour(supervisor2).
+-behaviour(ranch_protocol).
+
+-export([start_link/4, reader/1]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-spec start_link(any(), rabbit_net:socket(), module(), any()) ->
+ {'ok', pid(), pid()}.
+
+start_link(Ref, _Sock, _Transport, _Opts) ->
+ {ok, SupPid} = supervisor2:start_link(?MODULE, []),
+ %% We need to get channels in the hierarchy here so they get shut
+ %% down after the reader, so the reader gets a chance to terminate
+ %% them cleanly. But for 1.0 readers we can't start the real
+ %% ch_sup_sup (because we don't know if we will be 0-9-1 or 1.0) -
+ %% so we add another supervisor into the hierarchy.
+ %%
+ %% This supervisor also acts as an intermediary for heartbeaters and
+ %% the queue collector process, since these must not be siblings of the
+ %% reader due to the potential for deadlock if they are added/restarted
+ %% whilst the supervision tree is shutting down.
+ {ok, HelperSup} =
+ supervisor2:start_child(
+ SupPid,
+ {helper_sup, {rabbit_connection_helper_sup, start_link, []},
+ intrinsic, infinity, supervisor, [rabbit_connection_helper_sup]}),
+ {ok, ReaderPid} =
+ supervisor2:start_child(
+ SupPid,
+ {reader, {rabbit_reader, start_link, [HelperSup, Ref]},
+ intrinsic, ?WORKER_WAIT, worker, [rabbit_reader]}),
+ {ok, SupPid, ReaderPid}.
+
+-spec reader(pid()) -> pid().
+
+reader(Pid) ->
+ hd(supervisor2:find_child(Pid, reader)).
+
+%%--------------------------------------------------------------------------
+
+init([]) ->
+ ?LG_PROCESS_TYPE(connection_sup),
+ {ok, {{one_for_all, 0, 1}, []}}.
diff --git a/deps/rabbit/src/rabbit_connection_tracking.erl b/deps/rabbit/src/rabbit_connection_tracking.erl
new file mode 100644
index 0000000000..c0704e6a7c
--- /dev/null
+++ b/deps/rabbit/src/rabbit_connection_tracking.erl
@@ -0,0 +1,515 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_connection_tracking).
+
+%% Abstracts away how tracked connection records are stored
+%% and queried.
+%%
+%% See also:
+%%
+%% * rabbit_connection_tracking_handler
+%% * rabbit_reader
+%% * rabbit_event
+-behaviour(rabbit_tracking).
+
+-export([boot/0,
+ update_tracked/1,
+ handle_cast/1,
+ register_tracked/1,
+ unregister_tracked/1,
+ count_tracked_items_in/1,
+ clear_tracking_tables/0,
+ shutdown_tracked_items/2]).
+
+-export([ensure_tracked_connections_table_for_node/1,
+ ensure_per_vhost_tracked_connections_table_for_node/1,
+ ensure_per_user_tracked_connections_table_for_node/1,
+
+ ensure_tracked_connections_table_for_this_node/0,
+ ensure_per_vhost_tracked_connections_table_for_this_node/0,
+ ensure_per_user_tracked_connections_table_for_this_node/0,
+
+ tracked_connection_table_name_for/1,
+ tracked_connection_per_vhost_table_name_for/1,
+ tracked_connection_per_user_table_name_for/1,
+ get_all_tracked_connection_table_names_for_node/1,
+
+ delete_tracked_connections_table_for_node/1,
+ delete_per_vhost_tracked_connections_table_for_node/1,
+ delete_per_user_tracked_connections_table_for_node/1,
+ delete_tracked_connection_user_entry/1,
+ delete_tracked_connection_vhost_entry/1,
+
+ clear_tracked_connection_tables_for_this_node/0,
+
+ list/0, list/1, list_on_node/1, list_on_node/2, list_of_user/1,
+ tracked_connection_from_connection_created/1,
+ tracked_connection_from_connection_state/1,
+ lookup/1,
+ count/0]).
+
+-include_lib("rabbit.hrl").
+
+-import(rabbit_misc, [pget/2]).
+
+-export([close_connections/3]).
+
+%%
+%% API
+%%
+
+%% Behaviour callbacks
+
+-spec boot() -> ok.
+
+%% Sets up and resets connection tracking tables for this
+%% node.
+boot() ->
+ ensure_tracked_connections_table_for_this_node(),
+ rabbit_log:info("Setting up a table for connection tracking on this node: ~p",
+ [tracked_connection_table_name_for(node())]),
+ ensure_per_vhost_tracked_connections_table_for_this_node(),
+ rabbit_log:info("Setting up a table for per-vhost connection counting on this node: ~p",
+ [tracked_connection_per_vhost_table_name_for(node())]),
+ ensure_per_user_tracked_connections_table_for_this_node(),
+ rabbit_log:info("Setting up a table for per-user connection counting on this node: ~p",
+ [tracked_connection_per_user_table_name_for(node())]),
+ clear_tracking_tables(),
+ ok.
+
+-spec update_tracked(term()) -> ok.
+
+update_tracked(Event) ->
+ spawn(?MODULE, handle_cast, [Event]),
+ ok.
+
+%% Asynchronously handle update events
+-spec handle_cast(term()) -> ok.
+
+handle_cast({connection_created, Details}) ->
+ ThisNode = node(),
+ case pget(node, Details) of
+ ThisNode ->
+ TConn = tracked_connection_from_connection_created(Details),
+ ConnId = TConn#tracked_connection.id,
+ try
+ register_tracked(TConn)
+ catch
+ error:{no_exists, _} ->
+ Msg = "Could not register connection ~p for tracking, "
+ "its table is not ready yet or the connection terminated prematurely",
+ rabbit_log_connection:warning(Msg, [ConnId]),
+ ok;
+ error:Err ->
+ Msg = "Could not register connection ~p for tracking: ~p",
+ rabbit_log_connection:warning(Msg, [ConnId, Err]),
+ ok
+ end;
+ _OtherNode ->
+ %% ignore
+ ok
+ end;
+handle_cast({connection_closed, Details}) ->
+ ThisNode = node(),
+ case pget(node, Details) of
+ ThisNode ->
+ %% [{name,<<"127.0.0.1:64078 -> 127.0.0.1:5672">>},
+ %% {pid,<0.1774.0>},
+ %% {node, rabbit@hostname}]
+ unregister_tracked(
+ rabbit_tracking:id(ThisNode, pget(name, Details)));
+ _OtherNode ->
+ %% ignore
+ ok
+ end;
+handle_cast({vhost_deleted, Details}) ->
+ VHost = pget(name, Details),
+ %% Schedule vhost entry deletion, allowing time for connections to close
+ _ = timer:apply_after(?TRACKING_EXECUTION_TIMEOUT, ?MODULE,
+ delete_tracked_connection_vhost_entry, [VHost]),
+ rabbit_log_connection:info("Closing all connections in vhost '~s' because it's being deleted", [VHost]),
+ shutdown_tracked_items(
+ rabbit_connection_tracking:list(VHost),
+ rabbit_misc:format("vhost '~s' is deleted", [VHost]));
+%% Note: under normal circumstances this will be called immediately
+%% after the vhost_deleted above. Therefore we should be careful about
+%% what we log and be more defensive.
+handle_cast({vhost_down, Details}) ->
+ VHost = pget(name, Details),
+ Node = pget(node, Details),
+ rabbit_log_connection:info("Closing all connections in vhost '~s' on node '~s'"
+ " because the vhost is stopping",
+ [VHost, Node]),
+ shutdown_tracked_items(
+ rabbit_connection_tracking:list_on_node(Node, VHost),
+ rabbit_misc:format("vhost '~s' is down", [VHost]));
+handle_cast({user_deleted, Details}) ->
+ Username = pget(name, Details),
+ %% Schedule user entry deletion, allowing time for connections to close
+ _ = timer:apply_after(?TRACKING_EXECUTION_TIMEOUT, ?MODULE,
+ delete_tracked_connection_user_entry, [Username]),
+ rabbit_log_connection:info("Closing all connections from user '~s' because it's being deleted", [Username]),
+ shutdown_tracked_items(
+ rabbit_connection_tracking:list_of_user(Username),
+ rabbit_misc:format("user '~s' is deleted", [Username]));
+%% A node had been deleted from the cluster.
+handle_cast({node_deleted, Details}) ->
+ Node = pget(node, Details),
+ rabbit_log_connection:info("Node '~s' was removed from the cluster, deleting its connection tracking tables...", [Node]),
+ delete_tracked_connections_table_for_node(Node),
+ delete_per_vhost_tracked_connections_table_for_node(Node),
+ delete_per_user_tracked_connections_table_for_node(Node).
+
+-spec register_tracked(rabbit_types:tracked_connection()) -> ok.
+-dialyzer([{nowarn_function, [register_tracked/1]}, race_conditions]).
+
+register_tracked(#tracked_connection{username = Username, vhost = VHost, id = ConnId, node = Node} = Conn) when Node =:= node() ->
+ TableName = tracked_connection_table_name_for(Node),
+ PerVhostTableName = tracked_connection_per_vhost_table_name_for(Node),
+ PerUserConnTableName = tracked_connection_per_user_table_name_for(Node),
+ %% upsert
+ case mnesia:dirty_read(TableName, ConnId) of
+ [] ->
+ mnesia:dirty_write(TableName, Conn),
+ mnesia:dirty_update_counter(PerVhostTableName, VHost, 1),
+ mnesia:dirty_update_counter(PerUserConnTableName, Username, 1);
+ [#tracked_connection{}] ->
+ ok
+ end,
+ ok.
+
+-spec unregister_tracked(rabbit_types:tracked_connection_id()) -> ok.
+
+unregister_tracked(ConnId = {Node, _Name}) when Node =:= node() ->
+ TableName = tracked_connection_table_name_for(Node),
+ PerVhostTableName = tracked_connection_per_vhost_table_name_for(Node),
+ PerUserConnTableName = tracked_connection_per_user_table_name_for(Node),
+ case mnesia:dirty_read(TableName, ConnId) of
+ [] -> ok;
+ [#tracked_connection{vhost = VHost, username = Username}] ->
+ mnesia:dirty_update_counter(PerUserConnTableName, Username, -1),
+ mnesia:dirty_update_counter(PerVhostTableName, VHost, -1),
+ mnesia:dirty_delete(TableName, ConnId)
+ end.
+
+-spec count_tracked_items_in({atom(), rabbit_types:vhost()}) -> non_neg_integer().
+
+count_tracked_items_in({vhost, VirtualHost}) ->
+ rabbit_tracking:count_tracked_items(
+ fun tracked_connection_per_vhost_table_name_for/1,
+ #tracked_connection_per_vhost.connection_count, VirtualHost,
+ "connections in vhost");
+count_tracked_items_in({user, Username}) ->
+ rabbit_tracking:count_tracked_items(
+ fun tracked_connection_per_user_table_name_for/1,
+ #tracked_connection_per_user.connection_count, Username,
+ "connections for user").
+
+-spec clear_tracking_tables() -> ok.
+
+clear_tracking_tables() ->
+ clear_tracked_connection_tables_for_this_node().
+
+-spec shutdown_tracked_items(list(), term()) -> ok.
+
+shutdown_tracked_items(TrackedItems, Message) ->
+ close_connections(TrackedItems, Message).
+
+%% Extended API
+
+-spec ensure_tracked_connections_table_for_this_node() -> ok.
+
+ensure_tracked_connections_table_for_this_node() ->
+ ensure_tracked_connections_table_for_node(node()).
+
+
+-spec ensure_per_vhost_tracked_connections_table_for_this_node() -> ok.
+
+ensure_per_vhost_tracked_connections_table_for_this_node() ->
+ ensure_per_vhost_tracked_connections_table_for_node(node()).
+
+
+-spec ensure_per_user_tracked_connections_table_for_this_node() -> ok.
+
+ensure_per_user_tracked_connections_table_for_this_node() ->
+ ensure_per_user_tracked_connections_table_for_node(node()).
+
+
+%% Create tables
+-spec ensure_tracked_connections_table_for_node(node()) -> ok.
+
+ensure_tracked_connections_table_for_node(Node) ->
+ TableName = tracked_connection_table_name_for(Node),
+ case mnesia:create_table(TableName, [{record_name, tracked_connection},
+ {attributes, record_info(fields, tracked_connection)}]) of
+ {atomic, ok} -> ok;
+ {aborted, {already_exists, _}} -> ok;
+ {aborted, Error} ->
+ rabbit_log:error("Failed to create a tracked connection table for node ~p: ~p", [Node, Error]),
+ ok
+ end.
+
+-spec ensure_per_vhost_tracked_connections_table_for_node(node()) -> ok.
+
+ensure_per_vhost_tracked_connections_table_for_node(Node) ->
+ TableName = tracked_connection_per_vhost_table_name_for(Node),
+ case mnesia:create_table(TableName, [{record_name, tracked_connection_per_vhost},
+ {attributes, record_info(fields, tracked_connection_per_vhost)}]) of
+ {atomic, ok} -> ok;
+ {aborted, {already_exists, _}} -> ok;
+ {aborted, Error} ->
+ rabbit_log:error("Failed to create a per-vhost tracked connection table for node ~p: ~p", [Node, Error]),
+ ok
+ end.
+
+-spec ensure_per_user_tracked_connections_table_for_node(node()) -> ok.
+
+ensure_per_user_tracked_connections_table_for_node(Node) ->
+ TableName = tracked_connection_per_user_table_name_for(Node),
+ case mnesia:create_table(TableName, [{record_name, tracked_connection_per_user},
+ {attributes, record_info(fields, tracked_connection_per_user)}]) of
+ {atomic, ok} -> ok;
+ {aborted, {already_exists, _}} -> ok;
+ {aborted, Error} ->
+ rabbit_log:error("Failed to create a per-user tracked connection table for node ~p: ~p", [Node, Error]),
+ ok
+ end.
+
+-spec clear_tracked_connection_tables_for_this_node() -> ok.
+
+clear_tracked_connection_tables_for_this_node() ->
+ [rabbit_tracking:clear_tracking_table(T)
+ || T <- get_all_tracked_connection_table_names_for_node(node())],
+ ok.
+
+-spec delete_tracked_connections_table_for_node(node()) -> ok.
+
+delete_tracked_connections_table_for_node(Node) ->
+ TableName = tracked_connection_table_name_for(Node),
+ rabbit_tracking:delete_tracking_table(TableName, Node, "tracked connection").
+
+-spec delete_per_vhost_tracked_connections_table_for_node(node()) -> ok.
+
+delete_per_vhost_tracked_connections_table_for_node(Node) ->
+ TableName = tracked_connection_per_vhost_table_name_for(Node),
+ rabbit_tracking:delete_tracking_table(TableName, Node,
+ "per-vhost tracked connection").
+
+-spec delete_per_user_tracked_connections_table_for_node(node()) -> ok.
+
+delete_per_user_tracked_connections_table_for_node(Node) ->
+ TableName = tracked_connection_per_user_table_name_for(Node),
+ rabbit_tracking:delete_tracking_table(TableName, Node,
+ "per-user tracked connection").
+
+-spec tracked_connection_table_name_for(node()) -> atom().
+
+tracked_connection_table_name_for(Node) ->
+ list_to_atom(rabbit_misc:format("tracked_connection_on_node_~s", [Node])).
+
+-spec tracked_connection_per_vhost_table_name_for(node()) -> atom().
+
+tracked_connection_per_vhost_table_name_for(Node) ->
+ list_to_atom(rabbit_misc:format("tracked_connection_per_vhost_on_node_~s", [Node])).
+
+-spec tracked_connection_per_user_table_name_for(node()) -> atom().
+
+tracked_connection_per_user_table_name_for(Node) ->
+ list_to_atom(rabbit_misc:format(
+ "tracked_connection_table_per_user_on_node_~s", [Node])).
+
+-spec get_all_tracked_connection_table_names_for_node(node()) -> [atom()].
+
+get_all_tracked_connection_table_names_for_node(Node) ->
+ [tracked_connection_table_name_for(Node),
+ tracked_connection_per_vhost_table_name_for(Node),
+ tracked_connection_per_user_table_name_for(Node)].
+
+-spec lookup(rabbit_types:connection_name()) -> rabbit_types:tracked_connection() | 'not_found'.
+
+lookup(Name) ->
+ Nodes = rabbit_nodes:all_running(),
+ lookup(Name, Nodes).
+
+lookup(_, []) ->
+ not_found;
+lookup(Name, [Node | Nodes]) ->
+ TableName = tracked_connection_table_name_for(Node),
+ case mnesia:dirty_read(TableName, {Node, Name}) of
+ [] -> lookup(Name, Nodes);
+ [Row] -> Row
+ end.
+
+-spec list() -> [rabbit_types:tracked_connection()].
+
+list() ->
+ lists:foldl(
+ fun (Node, Acc) ->
+ Tab = tracked_connection_table_name_for(Node),
+ Acc ++ mnesia:dirty_match_object(Tab, #tracked_connection{_ = '_'})
+ end, [], rabbit_nodes:all_running()).
+
+-spec count() -> non_neg_integer().
+
+count() ->
+ lists:foldl(
+ fun (Node, Acc) ->
+ Tab = tracked_connection_table_name_for(Node),
+ Acc + mnesia:table_info(Tab, size)
+ end, 0, rabbit_nodes:all_running()).
+
+-spec list(rabbit_types:vhost()) -> [rabbit_types:tracked_connection()].
+
+list(VHost) ->
+ rabbit_tracking:match_tracked_items(
+ fun tracked_connection_table_name_for/1,
+ #tracked_connection{vhost = VHost, _ = '_'}).
+
+-spec list_on_node(node()) -> [rabbit_types:tracked_connection()].
+
+list_on_node(Node) ->
+ try mnesia:dirty_match_object(
+ tracked_connection_table_name_for(Node),
+ #tracked_connection{_ = '_'})
+ catch exit:{aborted, {no_exists, _}} -> []
+ end.
+
+-spec list_on_node(node(), rabbit_types:vhost()) -> [rabbit_types:tracked_connection()].
+
+list_on_node(Node, VHost) ->
+ try mnesia:dirty_match_object(
+ tracked_connection_table_name_for(Node),
+ #tracked_connection{vhost = VHost, _ = '_'})
+ catch exit:{aborted, {no_exists, _}} -> []
+ end.
+
+
+-spec list_of_user(rabbit_types:username()) -> [rabbit_types:tracked_connection()].
+
+list_of_user(Username) ->
+ rabbit_tracking:match_tracked_items(
+ fun tracked_connection_table_name_for/1,
+ #tracked_connection{username = Username, _ = '_'}).
+
+%% Internal, delete tracked entries
+
+delete_tracked_connection_vhost_entry(Vhost) ->
+ rabbit_tracking:delete_tracked_entry(
+ {rabbit_vhost, exists, [Vhost]},
+ fun tracked_connection_per_vhost_table_name_for/1,
+ Vhost).
+
+delete_tracked_connection_user_entry(Username) ->
+ rabbit_tracking:delete_tracked_entry(
+ {rabbit_auth_backend_internal, exists, [Username]},
+ fun tracked_connection_per_user_table_name_for/1,
+ Username).
+
+%% Returns a #tracked_connection from connection_created
+%% event details.
+%%
+%% @see rabbit_connection_tracking_handler.
+tracked_connection_from_connection_created(EventDetails) ->
+ %% Example event:
+ %%
+ %% [{type,network},
+ %% {pid,<0.329.0>},
+ %% {name,<<"127.0.0.1:60998 -> 127.0.0.1:5672">>},
+ %% {port,5672},
+ %% {peer_port,60998},
+ %% {host,{0,0,0,0,0,65535,32512,1}},
+ %% {peer_host,{0,0,0,0,0,65535,32512,1}},
+ %% {ssl,false},
+ %% {peer_cert_subject,''},
+ %% {peer_cert_issuer,''},
+ %% {peer_cert_validity,''},
+ %% {auth_mechanism,<<"PLAIN">>},
+ %% {ssl_protocol,''},
+ %% {ssl_key_exchange,''},
+ %% {ssl_cipher,''},
+ %% {ssl_hash,''},
+ %% {protocol,{0,9,1}},
+ %% {user,<<"guest">>},
+ %% {vhost,<<"/">>},
+ %% {timeout,14},
+ %% {frame_max,131072},
+ %% {channel_max,65535},
+ %% {client_properties,
+ %% [{<<"capabilities">>,table,
+ %% [{<<"publisher_confirms">>,bool,true},
+ %% {<<"consumer_cancel_notify">>,bool,true},
+ %% {<<"exchange_exchange_bindings">>,bool,true},
+ %% {<<"basic.nack">>,bool,true},
+ %% {<<"connection.blocked">>,bool,true},
+ %% {<<"authentication_failure_close">>,bool,true}]},
+ %% {<<"product">>,longstr,<<"Bunny">>},
+ %% {<<"platform">>,longstr,
+ %% <<"ruby 2.3.0p0 (2015-12-25 revision 53290) [x86_64-darwin15]">>},
+ %% {<<"version">>,longstr,<<"2.3.0.pre">>},
+ %% {<<"information">>,longstr,
+ %% <<"http://rubybunny.info">>}]},
+ %% {connected_at,1453214290847}]
+ Name = pget(name, EventDetails),
+ Node = pget(node, EventDetails),
+ #tracked_connection{id = rabbit_tracking:id(Node, Name),
+ name = Name,
+ node = Node,
+ vhost = pget(vhost, EventDetails),
+ username = pget(user, EventDetails),
+ connected_at = pget(connected_at, EventDetails),
+ pid = pget(pid, EventDetails),
+ type = pget(type, EventDetails),
+ peer_host = pget(peer_host, EventDetails),
+ peer_port = pget(peer_port, EventDetails)}.
+
+tracked_connection_from_connection_state(#connection{
+ vhost = VHost,
+ connected_at = Ts,
+ peer_host = PeerHost,
+ peer_port = PeerPort,
+ user = Username,
+ name = Name
+ }) ->
+ tracked_connection_from_connection_created(
+ [{name, Name},
+ {node, node()},
+ {vhost, VHost},
+ {user, Username},
+ {user_who_performed_action, Username},
+ {connected_at, Ts},
+ {pid, self()},
+ {type, network},
+ {peer_port, PeerPort},
+ {peer_host, PeerHost}]).
+
+close_connections(Tracked, Message) ->
+ close_connections(Tracked, Message, 0).
+
+close_connections(Tracked, Message, Delay) ->
+ [begin
+ close_connection(Conn, Message),
+ timer:sleep(Delay)
+ end || Conn <- Tracked],
+ ok.
+
+close_connection(#tracked_connection{pid = Pid, type = network}, Message) ->
+ try
+ rabbit_networking:close_connection(Pid, Message)
+ catch error:{not_a_connection, _} ->
+ %% could has been closed concurrently, or the input
+ %% is bogus. In any case, we should not terminate
+ ok;
+ _:Err ->
+ %% ignore, don't terminate
+ rabbit_log:warning("Could not close connection ~p: ~p", [Pid, Err]),
+ ok
+ end;
+close_connection(#tracked_connection{pid = Pid, type = direct}, Message) ->
+ %% Do an RPC call to the node running the direct client.
+ Node = node(Pid),
+ rpc:call(Node, amqp_direct_connection, server_close, [Pid, 320, Message]).
diff --git a/deps/rabbit/src/rabbit_connection_tracking_handler.erl b/deps/rabbit/src/rabbit_connection_tracking_handler.erl
new file mode 100644
index 0000000000..17085d805a
--- /dev/null
+++ b/deps/rabbit/src/rabbit_connection_tracking_handler.erl
@@ -0,0 +1,80 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_connection_tracking_handler).
+
+%% This module keeps track of connection creation and termination events
+%% on its local node. The primary goal here is to decouple connection
+%% tracking from rabbit_reader in rabbit_common.
+%%
+%% Events from other nodes are ignored.
+
+-behaviour(gen_event).
+
+-export([init/1, handle_call/2, handle_event/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+%% for compatibility with previous versions of CLI tools
+-export([close_connections/3]).
+
+-include_lib("rabbit.hrl").
+
+-rabbit_boot_step({?MODULE,
+ [{description, "connection tracking event handler"},
+ {mfa, {gen_event, add_handler,
+ [rabbit_event, ?MODULE, []]}},
+ {cleanup, {gen_event, delete_handler,
+ [rabbit_event, ?MODULE, []]}},
+ {requires, [connection_tracking]},
+ {enables, recovery}]}).
+
+%%
+%% API
+%%
+
+init([]) ->
+ {ok, []}.
+
+handle_event(#event{type = connection_created, props = Details}, State) ->
+ ok = rabbit_connection_tracking:update_tracked({connection_created, Details}),
+ {ok, State};
+handle_event(#event{type = connection_closed, props = Details}, State) ->
+ ok = rabbit_connection_tracking:update_tracked({connection_closed, Details}),
+ {ok, State};
+handle_event(#event{type = vhost_deleted, props = Details}, State) ->
+ ok = rabbit_connection_tracking:update_tracked({vhost_deleted, Details}),
+ {ok, State};
+%% Note: under normal circumstances this will be called immediately
+%% after the vhost_deleted above. Therefore we should be careful about
+%% what we log and be more defensive.
+handle_event(#event{type = vhost_down, props = Details}, State) ->
+ ok = rabbit_connection_tracking:update_tracked({vhost_down, Details}),
+ {ok, State};
+handle_event(#event{type = user_deleted, props = Details}, State) ->
+ ok = rabbit_connection_tracking:update_tracked({user_deleted, Details}),
+ {ok, State};
+%% A node had been deleted from the cluster.
+handle_event(#event{type = node_deleted, props = Details}, State) ->
+ ok = rabbit_connection_tracking:update_tracked({node_deleted, Details}),
+ {ok, State};
+handle_event(_Event, State) ->
+ {ok, State}.
+
+handle_call(_Request, State) ->
+ {ok, not_understood, State}.
+
+handle_info(_Info, State) ->
+ {ok, State}.
+
+terminate(_Arg, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+close_connections(Tracked, Message, Delay) ->
+ rabbit_connection_tracking:close_connections(Tracked, Message, Delay).
diff --git a/deps/rabbit/src/rabbit_control_pbe.erl b/deps/rabbit/src/rabbit_control_pbe.erl
new file mode 100644
index 0000000000..95c4fe41f1
--- /dev/null
+++ b/deps/rabbit/src/rabbit_control_pbe.erl
@@ -0,0 +1,82 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_control_pbe).
+
+-export([decode/4, encode/4, list_ciphers/0, list_hashes/0]).
+
+% for testing purposes
+-export([evaluate_input_as_term/1]).
+
+list_ciphers() ->
+ {ok, io_lib:format("~p", [rabbit_pbe:supported_ciphers()])}.
+
+list_hashes() ->
+ {ok, io_lib:format("~p", [rabbit_pbe:supported_hashes()])}.
+
+validate(_Cipher, _Hash, Iterations, _Args) when Iterations =< 0 ->
+ {error, io_lib:format("The requested number of iterations is incorrect", [])};
+validate(_Cipher, _Hash, _Iterations, Args) when length(Args) < 2 ->
+ {error, io_lib:format("Please provide a value to encode/decode and a passphrase", [])};
+validate(_Cipher, _Hash, _Iterations, Args) when length(Args) > 2 ->
+ {error, io_lib:format("Too many arguments. Please provide a value to encode/decode and a passphrase", [])};
+validate(Cipher, Hash, _Iterations, _Args) ->
+ case lists:member(Cipher, rabbit_pbe:supported_ciphers()) of
+ false ->
+ {error, io_lib:format("The requested cipher is not supported", [])};
+ true ->
+ case lists:member(Hash, rabbit_pbe:supported_hashes()) of
+ false ->
+ {error, io_lib:format("The requested hash is not supported", [])};
+ true -> ok
+ end
+ end.
+
+encode(Cipher, Hash, Iterations, Args) ->
+ case validate(Cipher, Hash, Iterations, Args) of
+ {error, Err} -> {error, Err};
+ ok ->
+ [Value, PassPhrase] = Args,
+ try begin
+ TermValue = evaluate_input_as_term(Value),
+ Result = {encrypted, _} = rabbit_pbe:encrypt_term(Cipher, Hash, Iterations,
+ list_to_binary(PassPhrase), TermValue),
+ {ok, io_lib:format("~p", [Result])}
+ end
+ catch
+ _:Msg -> {error, io_lib:format("Error during cipher operation: ~p", [Msg])}
+ end
+ end.
+
+decode(Cipher, Hash, Iterations, Args) ->
+ case validate(Cipher, Hash, Iterations, Args) of
+ {error, Err} -> {error, Err};
+ ok ->
+ [Value, PassPhrase] = Args,
+ try begin
+ TermValue = evaluate_input_as_term(Value),
+ TermToDecrypt = case TermValue of
+ {encrypted, _}=EncryptedTerm ->
+ EncryptedTerm;
+ _ ->
+ {encrypted, TermValue}
+ end,
+ Result = rabbit_pbe:decrypt_term(Cipher, Hash, Iterations,
+ list_to_binary(PassPhrase),
+ TermToDecrypt),
+ {ok, io_lib:format("~p", [Result])}
+ end
+ catch
+ _:Msg -> {error, io_lib:format("Error during cipher operation: ~p", [Msg])}
+ end
+ end.
+
+evaluate_input_as_term(Input) ->
+ {ok,Tokens,_EndLine} = erl_scan:string(Input ++ "."),
+ {ok,AbsForm} = erl_parse:parse_exprs(Tokens),
+ {value,TermValue,_Bs} = erl_eval:exprs(AbsForm, erl_eval:new_bindings()),
+ TermValue.
diff --git a/deps/rabbit/src/rabbit_core_ff.erl b/deps/rabbit/src/rabbit_core_ff.erl
new file mode 100644
index 0000000000..6d30846775
--- /dev/null
+++ b/deps/rabbit/src/rabbit_core_ff.erl
@@ -0,0 +1,179 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_core_ff).
+
+-export([quorum_queue_migration/3,
+ stream_queue_migration/3,
+ implicit_default_bindings_migration/3,
+ virtual_host_metadata_migration/3,
+ maintenance_mode_status_migration/3,
+ user_limits_migration/3]).
+
+-rabbit_feature_flag(
+ {quorum_queue,
+ #{desc => "Support queues of type `quorum`",
+ doc_url => "https://www.rabbitmq.com/quorum-queues.html",
+ stability => stable,
+ migration_fun => {?MODULE, quorum_queue_migration}
+ }}).
+
+-rabbit_feature_flag(
+ {stream_queue,
+ #{desc => "Support queues of type `stream`",
+ doc_url => "https://www.rabbitmq.com/stream-queues.html",
+ stability => stable,
+ depends_on => [quorum_queue],
+ migration_fun => {?MODULE, stream_queue_migration}
+ }}).
+
+-rabbit_feature_flag(
+ {implicit_default_bindings,
+ #{desc => "Default bindings are now implicit, instead of "
+ "being stored in the database",
+ stability => stable,
+ migration_fun => {?MODULE, implicit_default_bindings_migration}
+ }}).
+
+-rabbit_feature_flag(
+ {virtual_host_metadata,
+ #{desc => "Virtual host metadata (description, tags, etc)",
+ stability => stable,
+ migration_fun => {?MODULE, virtual_host_metadata_migration}
+ }}).
+
+-rabbit_feature_flag(
+ {maintenance_mode_status,
+ #{desc => "Maintenance mode status",
+ stability => stable,
+ migration_fun => {?MODULE, maintenance_mode_status_migration}
+ }}).
+
+-rabbit_feature_flag(
+ {user_limits,
+ #{desc => "Configure connection and channel limits for a user",
+ stability => stable,
+ migration_fun => {?MODULE, user_limits_migration}
+ }}).
+
+%% -------------------------------------------------------------------
+%% Quorum queues.
+%% -------------------------------------------------------------------
+
+-define(quorum_queue_tables, [rabbit_queue,
+ rabbit_durable_queue]).
+
+quorum_queue_migration(FeatureName, _FeatureProps, enable) ->
+ Tables = ?quorum_queue_tables,
+ rabbit_table:wait(Tables, _Retry = true),
+ Fields = amqqueue:fields(amqqueue_v2),
+ migrate_to_amqqueue_with_type(FeatureName, Tables, Fields);
+quorum_queue_migration(_FeatureName, _FeatureProps, is_enabled) ->
+ Tables = ?quorum_queue_tables,
+ rabbit_table:wait(Tables, _Retry = true),
+ Fields = amqqueue:fields(amqqueue_v2),
+ mnesia:table_info(rabbit_queue, attributes) =:= Fields andalso
+ mnesia:table_info(rabbit_durable_queue, attributes) =:= Fields.
+
+stream_queue_migration(_FeatureName, _FeatureProps, _Enable) ->
+ ok.
+
+migrate_to_amqqueue_with_type(FeatureName, [Table | Rest], Fields) ->
+ rabbit_log_feature_flags:info(
+ "Feature flag `~s`: migrating Mnesia table ~s...",
+ [FeatureName, Table]),
+ Fun = fun(Queue) -> amqqueue:upgrade_to(amqqueue_v2, Queue) end,
+ case mnesia:transform_table(Table, Fun, Fields) of
+ {atomic, ok} -> migrate_to_amqqueue_with_type(FeatureName,
+ Rest,
+ Fields);
+ {aborted, Reason} -> {error, Reason}
+ end;
+migrate_to_amqqueue_with_type(FeatureName, [], _) ->
+ rabbit_log_feature_flags:info(
+ "Feature flag `~s`: Mnesia tables migration done",
+ [FeatureName]),
+ ok.
+
+%% -------------------------------------------------------------------
+%% Default bindings.
+%% -------------------------------------------------------------------
+
+implicit_default_bindings_migration(FeatureName, _FeatureProps,
+ enable) ->
+ %% Default exchange bindings are now implicit (not stored in the
+ %% route tables). It should be safe to remove them outside of a
+ %% transaction.
+ rabbit_table:wait([rabbit_queue]),
+ Queues = mnesia:dirty_all_keys(rabbit_queue),
+ remove_explicit_default_bindings(FeatureName, Queues);
+implicit_default_bindings_migration(_Feature_Name, _FeatureProps,
+ is_enabled) ->
+ undefined.
+
+remove_explicit_default_bindings(_FeatureName, []) ->
+ ok;
+remove_explicit_default_bindings(FeatureName, Queues) ->
+ rabbit_log_feature_flags:info(
+ "Feature flag `~s`: deleting explicit default bindings "
+ "for ~b queues (it may take some time)...",
+ [FeatureName, length(Queues)]),
+ [rabbit_binding:remove_default_exchange_binding_rows_of(Q)
+ || Q <- Queues],
+ ok.
+
+%% -------------------------------------------------------------------
+%% Virtual host metadata.
+%% -------------------------------------------------------------------
+
+virtual_host_metadata_migration(_FeatureName, _FeatureProps, enable) ->
+ Tab = rabbit_vhost,
+ rabbit_table:wait([Tab], _Retry = true),
+ Fun = fun(Row) -> vhost:upgrade_to(vhost_v2, Row) end,
+ case mnesia:transform_table(Tab, Fun, vhost:fields(vhost_v2)) of
+ {atomic, ok} -> ok;
+ {aborted, Reason} -> {error, Reason}
+ end;
+virtual_host_metadata_migration(_FeatureName, _FeatureProps, is_enabled) ->
+ mnesia:table_info(rabbit_vhost, attributes) =:= vhost:fields(vhost_v2).
+
+%% -------------------------------------------------------------------
+%% Maintenance mode.
+%% -------------------------------------------------------------------
+
+maintenance_mode_status_migration(FeatureName, _FeatureProps, enable) ->
+ TableName = rabbit_maintenance:status_table_name(),
+ rabbit_log:info(
+ "Creating table ~s for feature flag `~s`",
+ [TableName, FeatureName]),
+ try
+ _ = rabbit_table:create(
+ TableName,
+ rabbit_maintenance:status_table_definition()),
+ _ = rabbit_table:ensure_table_copy(TableName, node())
+ catch throw:Reason ->
+ rabbit_log:error(
+ "Failed to create maintenance status table: ~p",
+ [Reason])
+ end;
+maintenance_mode_status_migration(_FeatureName, _FeatureProps, is_enabled) ->
+ rabbit_table:exists(rabbit_maintenance:status_table_name()).
+
+%% -------------------------------------------------------------------
+%% User limits.
+%% -------------------------------------------------------------------
+
+user_limits_migration(_FeatureName, _FeatureProps, enable) ->
+ Tab = rabbit_user,
+ rabbit_table:wait([Tab], _Retry = true),
+ Fun = fun(Row) -> internal_user:upgrade_to(internal_user_v2, Row) end,
+ case mnesia:transform_table(Tab, Fun, internal_user:fields(internal_user_v2)) of
+ {atomic, ok} -> ok;
+ {aborted, Reason} -> {error, Reason}
+ end;
+user_limits_migration(_FeatureName, _FeatureProps, is_enabled) ->
+ mnesia:table_info(rabbit_user, attributes) =:= internal_user:fields(internal_user_v2).
diff --git a/deps/rabbit/src/rabbit_core_metrics_gc.erl b/deps/rabbit/src/rabbit_core_metrics_gc.erl
new file mode 100644
index 0000000000..890c127586
--- /dev/null
+++ b/deps/rabbit/src/rabbit_core_metrics_gc.erl
@@ -0,0 +1,199 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+-module(rabbit_core_metrics_gc).
+
+-record(state, {timer,
+ interval
+ }).
+
+-export([start_link/0]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+init(_) ->
+ Interval = rabbit_misc:get_env(rabbit, core_metrics_gc_interval, 120000),
+ {ok, start_timer(#state{interval = Interval})}.
+
+handle_call(test, _From, State) ->
+ {reply, ok, State}.
+
+handle_cast(_Request, State) ->
+ {noreply, State}.
+
+handle_info(start_gc, State) ->
+ gc_connections(),
+ gc_channels(),
+ gc_queues(),
+ gc_exchanges(),
+ gc_nodes(),
+ gc_gen_server2(),
+ gc_auth_attempts(),
+ {noreply, start_timer(State)}.
+
+terminate(_Reason, #state{timer = TRef}) ->
+ erlang:cancel_timer(TRef),
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+start_timer(#state{interval = Interval} = St) ->
+ TRef = erlang:send_after(Interval, self(), start_gc),
+ St#state{timer = TRef}.
+
+gc_connections() ->
+ gc_process(connection_created),
+ gc_process(connection_metrics),
+ gc_process(connection_coarse_metrics).
+
+gc_channels() ->
+ gc_process(channel_created),
+ gc_process(channel_metrics),
+ gc_process(channel_process_metrics),
+ ok.
+
+gc_queues() ->
+ gc_local_queues(),
+ gc_global_queues().
+
+gc_local_queues() ->
+ Queues = rabbit_amqqueue:list_local_names(),
+ QueuesDown = rabbit_amqqueue:list_local_names_down(),
+ GbSet = gb_sets:from_list(Queues),
+ GbSetDown = gb_sets:from_list(QueuesDown),
+ gc_queue_metrics(GbSet, GbSetDown),
+ gc_entity(queue_coarse_metrics, GbSet),
+ Followers = gb_sets:from_list([amqqueue:get_name(Q) || Q <- rabbit_amqqueue:list_local_followers() ]),
+ gc_leader_data(Followers).
+
+gc_leader_data(Followers) ->
+ ets:foldl(fun({Id, _, _, _, _}, none) ->
+ gc_leader_data(Id, queue_coarse_metrics, Followers)
+ end, none, queue_coarse_metrics).
+
+gc_leader_data(Id, Table, GbSet) ->
+ case gb_sets:is_member(Id, GbSet) of
+ true ->
+ ets:delete(Table, Id),
+ none;
+ false ->
+ none
+ end.
+
+gc_global_queues() ->
+ GbSet = gb_sets:from_list(rabbit_amqqueue:list_names()),
+ gc_process_and_entity(channel_queue_metrics, GbSet),
+ gc_process_and_entity(consumer_created, GbSet),
+ ExchangeGbSet = gb_sets:from_list(rabbit_exchange:list_names()),
+ gc_process_and_entities(channel_queue_exchange_metrics, GbSet, ExchangeGbSet).
+
+gc_exchanges() ->
+ Exchanges = rabbit_exchange:list_names(),
+ GbSet = gb_sets:from_list(Exchanges),
+ gc_process_and_entity(channel_exchange_metrics, GbSet).
+
+gc_nodes() ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ GbSet = gb_sets:from_list(Nodes),
+ gc_entity(node_node_metrics, GbSet).
+
+gc_gen_server2() ->
+ gc_process(gen_server2_metrics).
+
+gc_process(Table) ->
+ ets:foldl(fun({Pid = Key, _}, none) ->
+ gc_process(Pid, Table, Key);
+ ({Pid = Key, _, _, _, _}, none) ->
+ gc_process(Pid, Table, Key);
+ ({Pid = Key, _, _, _}, none) ->
+ gc_process(Pid, Table, Key)
+ end, none, Table).
+
+gc_process(Pid, Table, Key) ->
+ case rabbit_misc:is_process_alive(Pid) of
+ true ->
+ none;
+ false ->
+ ets:delete(Table, Key),
+ none
+ end.
+
+gc_queue_metrics(GbSet, GbSetDown) ->
+ Table = queue_metrics,
+ ets:foldl(fun({Key, Props, Marker}, none) ->
+ case gb_sets:is_member(Key, GbSet) of
+ true ->
+ case gb_sets:is_member(Key, GbSetDown) of
+ true ->
+ ets:insert(Table, {Key, [{state, down} | lists:keydelete(state, 1, Props)], Marker}),
+ none;
+ false ->
+ none
+ end;
+ false ->
+ ets:delete(Table, Key),
+ none
+ end
+ end, none, Table).
+
+gc_entity(Table, GbSet) ->
+ ets:foldl(fun({{_, Id} = Key, _}, none) ->
+ gc_entity(Id, Table, Key, GbSet);
+ ({Id = Key, _}, none) ->
+ gc_entity(Id, Table, Key, GbSet);
+ ({Id = Key, _, _}, none) ->
+ gc_entity(Id, Table, Key, GbSet);
+ ({Id = Key, _, _, _, _}, none) ->
+ gc_entity(Id, Table, Key, GbSet)
+ end, none, Table).
+
+gc_entity(Id, Table, Key, GbSet) ->
+ case gb_sets:is_member(Id, GbSet) of
+ true ->
+ none;
+ false ->
+ ets:delete(Table, Key),
+ none
+ end.
+
+gc_process_and_entity(Table, GbSet) ->
+ ets:foldl(fun({{Pid, Id} = Key, _, _, _, _, _, _, _, _}, none)
+ when Table == channel_queue_metrics ->
+ gc_process_and_entity(Id, Pid, Table, Key, GbSet);
+ ({{Pid, Id} = Key, _, _, _, _, _}, none)
+ when Table == channel_exchange_metrics ->
+ gc_process_and_entity(Id, Pid, Table, Key, GbSet);
+ ({{Id, Pid, _} = Key, _, _, _, _, _, _}, none)
+ when Table == consumer_created ->
+ gc_process_and_entity(Id, Pid, Table, Key, GbSet);
+ ({{{Pid, Id}, _} = Key, _, _, _, _}, none) ->
+ gc_process_and_entity(Id, Pid, Table, Key, GbSet)
+ end, none, Table).
+
+gc_process_and_entity(Id, Pid, Table, Key, GbSet) ->
+ case rabbit_misc:is_process_alive(Pid) andalso gb_sets:is_member(Id, GbSet) of
+ true ->
+ none;
+ false ->
+ ets:delete(Table, Key),
+ none
+ end.
+
+gc_process_and_entities(Table, QueueGbSet, ExchangeGbSet) ->
+ ets:foldl(fun({{Pid, {Q, X}} = Key, _, _}, none) ->
+ gc_process(Pid, Table, Key),
+ gc_entity(Q, Table, Key, QueueGbSet),
+ gc_entity(X, Table, Key, ExchangeGbSet)
+ end, none, Table).
+
+gc_auth_attempts() ->
+ ets:delete_all_objects(auth_attempt_detailed_metrics).
diff --git a/deps/rabbit/src/rabbit_credential_validation.erl b/deps/rabbit/src/rabbit_credential_validation.erl
new file mode 100644
index 0000000000..8712628ade
--- /dev/null
+++ b/deps/rabbit/src/rabbit_credential_validation.erl
@@ -0,0 +1,44 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_credential_validation).
+
+-include("rabbit.hrl").
+
+%% used for backwards compatibility
+-define(DEFAULT_BACKEND, rabbit_credential_validator_accept_everything).
+
+%%
+%% API
+%%
+
+-export([validate/2, backend/0]).
+
+%% Validates a username/password pair by delegating to the effective
+%% `rabbit_credential_validator`. Used by `rabbit_auth_backend_internal`.
+%% Note that some validators may choose to only validate passwords.
+%%
+%% Possible return values:
+%%
+%% * ok: provided credentials passed validation.
+%% * {error, Error, Args}: provided password password failed validation.
+
+-spec validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}.
+
+validate(Username, Password) ->
+ Backend = backend(),
+ Backend:validate(Username, Password).
+
+-spec backend() -> atom().
+
+backend() ->
+ case application:get_env(rabbit, credential_validator) of
+ undefined ->
+ ?DEFAULT_BACKEND;
+ {ok, Proplist} ->
+ proplists:get_value(validation_backend, Proplist, ?DEFAULT_BACKEND)
+ end.
diff --git a/deps/rabbit/src/rabbit_credential_validator.erl b/deps/rabbit/src/rabbit_credential_validator.erl
new file mode 100644
index 0000000000..3b5d0752bf
--- /dev/null
+++ b/deps/rabbit/src/rabbit_credential_validator.erl
@@ -0,0 +1,19 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_credential_validator).
+
+-include("rabbit.hrl").
+
+%% Validates a password. Used by `rabbit_auth_backend_internal`.
+%%
+%% Possible return values:
+%%
+%% * ok: provided password passed validation.
+%% * {error, Error, Args}: provided password password failed validation.
+
+-callback validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}.
diff --git a/deps/rabbit/src/rabbit_credential_validator_accept_everything.erl b/deps/rabbit/src/rabbit_credential_validator_accept_everything.erl
new file mode 100644
index 0000000000..fea10fd4b6
--- /dev/null
+++ b/deps/rabbit/src/rabbit_credential_validator_accept_everything.erl
@@ -0,0 +1,23 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_credential_validator_accept_everything).
+
+-include("rabbit.hrl").
+
+-behaviour(rabbit_credential_validator).
+
+%%
+%% API
+%%
+
+-export([validate/2]).
+
+-spec validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}.
+
+validate(_Username, _Password) ->
+ ok.
diff --git a/deps/rabbit/src/rabbit_credential_validator_min_password_length.erl b/deps/rabbit/src/rabbit_credential_validator_min_password_length.erl
new file mode 100644
index 0000000000..463090127f
--- /dev/null
+++ b/deps/rabbit/src/rabbit_credential_validator_min_password_length.erl
@@ -0,0 +1,50 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_credential_validator_min_password_length).
+
+-include("rabbit.hrl").
+
+-behaviour(rabbit_credential_validator).
+
+%% accommodates default (localhost-only) user credentials,
+%% guest/guest
+-define(DEFAULT_MIN_LENGTH, 5).
+
+%%
+%% API
+%%
+
+-export([validate/2]).
+%% for tests
+-export([validate/3]).
+
+-spec validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}.
+
+validate(Username, Password) ->
+ MinLength = case application:get_env(rabbit, credential_validator) of
+ undefined ->
+ ?DEFAULT_MIN_LENGTH;
+ {ok, Proplist} ->
+ case proplists:get_value(min_length, Proplist) of
+ undefined -> ?DEFAULT_MIN_LENGTH;
+ Value -> rabbit_data_coercion:to_integer(Value)
+ end
+ end,
+ validate(Username, Password, MinLength).
+
+
+-spec validate(rabbit_types:username(), rabbit_types:password(), integer()) -> 'ok' | {'error', string(), [any()]}.
+
+%% passwordless users
+validate(_Username, undefined, MinLength) ->
+ {error, rabbit_misc:format("minimum required password length is ~B", [MinLength])};
+validate(_Username, Password, MinLength) ->
+ case size(Password) >= MinLength of
+ true -> ok;
+ false -> {error, rabbit_misc:format("minimum required password length is ~B", [MinLength])}
+ end.
diff --git a/deps/rabbit/src/rabbit_credential_validator_password_regexp.erl b/deps/rabbit/src/rabbit_credential_validator_password_regexp.erl
new file mode 100644
index 0000000000..dc64cf1d31
--- /dev/null
+++ b/deps/rabbit/src/rabbit_credential_validator_password_regexp.erl
@@ -0,0 +1,42 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+
+%% A `rabbit_credential_validator` implementation that matches
+%% password against a pre-configured regular expression.
+-module(rabbit_credential_validator_password_regexp).
+
+-include("rabbit.hrl").
+
+-behaviour(rabbit_credential_validator).
+
+%%
+%% API
+%%
+
+-export([validate/2]).
+%% for tests
+-export([validate/3]).
+
+-spec validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}.
+
+validate(Username, Password) ->
+ {ok, Proplist} = application:get_env(rabbit, credential_validator),
+ Regexp = case proplists:get_value(regexp, Proplist) of
+ undefined -> {error, "rabbit.credential_validator.regexp config key is undefined"};
+ Value -> rabbit_data_coercion:to_list(Value)
+ end,
+ validate(Username, Password, Regexp).
+
+
+-spec validate(rabbit_types:username(), rabbit_types:password(), string()) -> 'ok' | {'error', string(), [any()]}.
+
+validate(_Username, Password, Pattern) ->
+ case re:run(rabbit_data_coercion:to_list(Password), Pattern) of
+ {match, _} -> ok;
+ nomatch -> {error, "provided password does not match the validator regular expression"}
+ end.
diff --git a/deps/rabbit/src/rabbit_dead_letter.erl b/deps/rabbit/src/rabbit_dead_letter.erl
new file mode 100644
index 0000000000..755de5cf53
--- /dev/null
+++ b/deps/rabbit/src/rabbit_dead_letter.erl
@@ -0,0 +1,253 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_dead_letter).
+
+-export([publish/5]).
+
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+
+%%----------------------------------------------------------------------------
+
+-type reason() :: 'expired' | 'rejected' | 'maxlen' | delivery_limit.
+
+%%----------------------------------------------------------------------------
+
+-spec publish(rabbit_types:message(), reason(), rabbit_types:exchange(),
+ 'undefined' | binary(), rabbit_amqqueue:name()) -> 'ok'.
+publish(Msg, Reason, X, RK, QName) ->
+ DLMsg = make_msg(Msg, Reason, X#exchange.name, RK, QName),
+ Delivery = rabbit_basic:delivery(false, false, DLMsg, undefined),
+ {Queues, Cycles} = detect_cycles(Reason, DLMsg,
+ rabbit_exchange:route(X, Delivery)),
+ lists:foreach(fun log_cycle_once/1, Cycles),
+ _ = rabbit_queue_type:deliver(rabbit_amqqueue:lookup(Queues),
+ Delivery, stateless),
+ ok.
+
+make_msg(Msg = #basic_message{content = Content,
+ exchange_name = Exchange,
+ routing_keys = RoutingKeys},
+ Reason, DLX, RK, #resource{name = QName}) ->
+ {DeathRoutingKeys, HeadersFun1} =
+ case RK of
+ undefined -> {RoutingKeys, fun (H) -> H end};
+ _ -> {[RK], fun (H) -> lists:keydelete(<<"CC">>, 1, H) end}
+ end,
+ ReasonBin = list_to_binary(atom_to_list(Reason)),
+ TimeSec = os:system_time(seconds),
+ PerMsgTTL = per_msg_ttl_header(Content#content.properties),
+ HeadersFun2 =
+ fun (Headers) ->
+ %% The first routing key is the one specified in the
+ %% basic.publish; all others are CC or BCC keys.
+ RKs = [hd(RoutingKeys) | rabbit_basic:header_routes(Headers)],
+ RKs1 = [{longstr, Key} || Key <- RKs],
+ Info = [{<<"reason">>, longstr, ReasonBin},
+ {<<"queue">>, longstr, QName},
+ {<<"time">>, timestamp, TimeSec},
+ {<<"exchange">>, longstr, Exchange#resource.name},
+ {<<"routing-keys">>, array, RKs1}] ++ PerMsgTTL,
+ HeadersFun1(update_x_death_header(Info, Headers))
+ end,
+ Content1 = #content{properties = Props} =
+ rabbit_basic:map_headers(HeadersFun2, Content),
+ Content2 = Content1#content{properties =
+ Props#'P_basic'{expiration = undefined}},
+ Msg#basic_message{exchange_name = DLX,
+ id = rabbit_guid:gen(),
+ routing_keys = DeathRoutingKeys,
+ content = Content2}.
+
+
+x_death_event_key(Info, Key) ->
+ case lists:keysearch(Key, 1, Info) of
+ false -> undefined;
+ {value, {Key, _KeyType, Val}} -> Val
+ end.
+
+maybe_append_to_event_group(Table, _Key, _SeenKeys, []) ->
+ [Table];
+maybe_append_to_event_group(Table, {_Queue, _Reason} = Key, SeenKeys, Acc) ->
+ case sets:is_element(Key, SeenKeys) of
+ true -> Acc;
+ false -> [Table | Acc]
+ end.
+
+group_by_queue_and_reason([]) ->
+ [];
+group_by_queue_and_reason([Table]) ->
+ [Table];
+group_by_queue_and_reason(Tables) ->
+ {_, Grouped} =
+ lists:foldl(
+ fun ({table, Info}, {SeenKeys, Acc}) ->
+ Q = x_death_event_key(Info, <<"queue">>),
+ R = x_death_event_key(Info, <<"reason">>),
+ Matcher = queue_and_reason_matcher(Q, R),
+ {Matches, _} = lists:partition(Matcher, Tables),
+ {Augmented, N} = case Matches of
+ [X] -> {X, 1};
+ [X|_] = Xs -> {X, length(Xs)}
+ end,
+ Key = {Q, R},
+ Acc1 = maybe_append_to_event_group(
+ ensure_xdeath_event_count(Augmented, N),
+ Key, SeenKeys, Acc),
+ {sets:add_element(Key, SeenKeys), Acc1}
+ end, {sets:new(), []}, Tables),
+ Grouped.
+
+update_x_death_header(Info, undefined) ->
+ update_x_death_header(Info, []);
+update_x_death_header(Info, Headers) ->
+ X = x_death_event_key(Info, <<"exchange">>),
+ Q = x_death_event_key(Info, <<"queue">>),
+ R = x_death_event_key(Info, <<"reason">>),
+ case rabbit_basic:header(<<"x-death">>, Headers) of
+ undefined ->
+ %% First x-death event gets its own top-level headers.
+ %% See rabbitmq/rabbitmq-server#1332.
+ Headers2 = rabbit_misc:set_table_value(Headers, <<"x-first-death-reason">>,
+ longstr, R),
+ Headers3 = rabbit_misc:set_table_value(Headers2, <<"x-first-death-queue">>,
+ longstr, Q),
+ Headers4 = rabbit_misc:set_table_value(Headers3, <<"x-first-death-exchange">>,
+ longstr, X),
+ rabbit_basic:prepend_table_header(
+ <<"x-death">>,
+ [{<<"count">>, long, 1} | Info], Headers4);
+ {<<"x-death">>, array, Tables} ->
+ %% group existing x-death headers in case we have some from
+ %% before rabbitmq-server#78
+ GroupedTables = group_by_queue_and_reason(Tables),
+ {Matches, Others} = lists:partition(
+ queue_and_reason_matcher(Q, R),
+ GroupedTables),
+ Info1 = case Matches of
+ [] ->
+ [{<<"count">>, long, 1} | Info];
+ [{table, M}] ->
+ increment_xdeath_event_count(M)
+ end,
+ rabbit_misc:set_table_value(
+ Headers, <<"x-death">>, array,
+ [{table, rabbit_misc:sort_field_table(Info1)} | Others]);
+ {<<"x-death">>, InvalidType, Header} ->
+ rabbit_log:warning("Message has invalid x-death header (type: ~p)."
+ " Resetting header ~p~n",
+ [InvalidType, Header]),
+ %% if x-death is something other than an array (list)
+ %% then we reset it: this happens when some clients consume
+ %% a message and re-publish is, converting header values
+ %% to strings, intentionally or not.
+ %% See rabbitmq/rabbitmq-server#767 for details.
+ rabbit_misc:set_table_value(
+ Headers, <<"x-death">>, array,
+ [{table, [{<<"count">>, long, 1} | Info]}])
+ end.
+
+ensure_xdeath_event_count({table, Info}, InitialVal) when InitialVal >= 1 ->
+ {table, ensure_xdeath_event_count(Info, InitialVal)};
+ensure_xdeath_event_count(Info, InitialVal) when InitialVal >= 1 ->
+ case x_death_event_key(Info, <<"count">>) of
+ undefined ->
+ [{<<"count">>, long, InitialVal} | Info];
+ _ ->
+ Info
+ end.
+
+increment_xdeath_event_count(Info) ->
+ case x_death_event_key(Info, <<"count">>) of
+ undefined ->
+ [{<<"count">>, long, 1} | Info];
+ N ->
+ lists:keyreplace(
+ <<"count">>, 1, Info,
+ {<<"count">>, long, N + 1})
+ end.
+
+queue_and_reason_matcher(Q, R) ->
+ F = fun(Info) ->
+ x_death_event_key(Info, <<"queue">>) =:= Q
+ andalso x_death_event_key(Info, <<"reason">>) =:= R
+ end,
+ fun({table, Info}) ->
+ F(Info);
+ (Info) when is_list(Info) ->
+ F(Info)
+ end.
+
+per_msg_ttl_header(#'P_basic'{expiration = undefined}) ->
+ [];
+per_msg_ttl_header(#'P_basic'{expiration = Expiration}) ->
+ [{<<"original-expiration">>, longstr, Expiration}];
+per_msg_ttl_header(_) ->
+ [].
+
+detect_cycles(rejected, _Msg, Queues) ->
+ {Queues, []};
+
+detect_cycles(_Reason, #basic_message{content = Content}, Queues) ->
+ #content{properties = #'P_basic'{headers = Headers}} =
+ rabbit_binary_parser:ensure_content_decoded(Content),
+ NoCycles = {Queues, []},
+ case Headers of
+ undefined ->
+ NoCycles;
+ _ ->
+ case rabbit_misc:table_lookup(Headers, <<"x-death">>) of
+ {array, Deaths} ->
+ {Cycling, NotCycling} =
+ lists:partition(fun (#resource{name = Queue}) ->
+ is_cycle(Queue, Deaths)
+ end, Queues),
+ OldQueues = [rabbit_misc:table_lookup(D, <<"queue">>) ||
+ {table, D} <- Deaths],
+ OldQueues1 = [QName || {longstr, QName} <- OldQueues],
+ {NotCycling, [[QName | OldQueues1] ||
+ #resource{name = QName} <- Cycling]};
+ _ ->
+ NoCycles
+ end
+ end.
+
+is_cycle(Queue, Deaths) ->
+ {Cycle, Rest} =
+ lists:splitwith(
+ fun ({table, D}) ->
+ {longstr, Queue} =/= rabbit_misc:table_lookup(D, <<"queue">>);
+ (_) ->
+ true
+ end, Deaths),
+ %% Is there a cycle, and if so, is it "fully automatic", i.e. with
+ %% no reject in it?
+ case Rest of
+ [] -> false;
+ [H|_] -> lists:all(
+ fun ({table, D}) ->
+ {longstr, <<"rejected">>} =/=
+ rabbit_misc:table_lookup(D, <<"reason">>);
+ (_) ->
+ %% There was something we didn't expect, therefore
+ %% a client must have put it there, therefore the
+ %% cycle was not "fully automatic".
+ false
+ end, Cycle ++ [H])
+ end.
+
+log_cycle_once(Queues) ->
+ Key = {queue_cycle, Queues},
+ case get(Key) of
+ true -> ok;
+ undefined -> rabbit_log:warning(
+ "Message dropped. Dead-letter queues cycle detected" ++
+ ": ~p~nThis cycle will NOT be reported again.~n",
+ [Queues]),
+ put(Key, true)
+ end.
diff --git a/deps/rabbit/src/rabbit_definitions.erl b/deps/rabbit/src/rabbit_definitions.erl
new file mode 100644
index 0000000000..0d0212dbae
--- /dev/null
+++ b/deps/rabbit/src/rabbit_definitions.erl
@@ -0,0 +1,767 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_definitions).
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([boot/0]).
+%% automatic import on boot
+-export([maybe_load_definitions/0, maybe_load_definitions/2, maybe_load_definitions_from/2,
+ has_configured_definitions_to_load/0]).
+%% import
+-export([import_raw/1, import_raw/2, import_parsed/1, import_parsed/2,
+ apply_defs/2, apply_defs/3, apply_defs/4, apply_defs/5]).
+
+-export([all_definitions/0]).
+-export([
+ list_users/0, list_vhosts/0, list_permissions/0, list_topic_permissions/0,
+ list_runtime_parameters/0, list_global_runtime_parameters/0, list_policies/0,
+ list_exchanges/0, list_queues/0, list_bindings/0,
+ is_internal_parameter/1
+]).
+-export([decode/1, decode/2, args/1]).
+
+-import(rabbit_misc, [pget/2]).
+
+%%
+%% API
+%%
+
+-type definition_category() :: 'users' |
+ 'vhosts' |
+ 'permissions' |
+ 'topic_permissions' |
+ 'parameters' |
+ 'global_parameters' |
+ 'policies' |
+ 'queues' |
+ 'bindings' |
+ 'exchanges'.
+
+-type definition_object() :: #{binary() => any()}.
+-type definition_list() :: [definition_object()].
+
+-type definitions() :: #{
+ definition_category() => definition_list()
+}.
+
+-export_type([definition_object/0, definition_list/0, definition_category/0, definitions/0]).
+
+-define(IMPORT_WORK_POOL, definition_import_pool).
+
+boot() ->
+ PoolSize = application:get_env(rabbit, definition_import_work_pool_size, rabbit_runtime:guess_number_of_cpu_cores()),
+ rabbit_sup:start_supervisor_child(definition_import_pool_sup, worker_pool_sup, [PoolSize, ?IMPORT_WORK_POOL]).
+
+maybe_load_definitions() ->
+ %% Note that management.load_definitions is handled in the plugin for backwards compatibility.
+ %% This executes the "core" version of load_definitions.
+ maybe_load_definitions(rabbit, load_definitions).
+
+-spec import_raw(Body :: binary() | iolist()) -> ok | {error, term()}.
+import_raw(Body) ->
+ rabbit_log:info("Asked to import definitions. Acting user: ~s", [?INTERNAL_USER]),
+ case decode([], Body) of
+ {error, E} -> {error, E};
+ {ok, _, Map} -> apply_defs(Map, ?INTERNAL_USER)
+ end.
+
+-spec import_raw(Body :: binary() | iolist(), VHost :: vhost:name()) -> ok | {error, term()}.
+import_raw(Body, VHost) ->
+ rabbit_log:info("Asked to import definitions. Acting user: ~s", [?INTERNAL_USER]),
+ case decode([], Body) of
+ {error, E} -> {error, E};
+ {ok, _, Map} -> apply_defs(Map, ?INTERNAL_USER, fun() -> ok end, VHost)
+ end.
+
+-spec import_parsed(Defs :: #{any() => any()} | list()) -> ok | {error, term()}.
+import_parsed(Body0) when is_list(Body0) ->
+ import_parsed(maps:from_list(Body0));
+import_parsed(Body0) when is_map(Body0) ->
+ rabbit_log:info("Asked to import definitions. Acting user: ~s", [?INTERNAL_USER]),
+ Body = atomise_map_keys(Body0),
+ apply_defs(Body, ?INTERNAL_USER).
+
+-spec import_parsed(Defs :: #{any() => any() | list()}, VHost :: vhost:name()) -> ok | {error, term()}.
+import_parsed(Body0, VHost) when is_list(Body0) ->
+ import_parsed(maps:from_list(Body0), VHost);
+import_parsed(Body0, VHost) ->
+ rabbit_log:info("Asked to import definitions. Acting user: ~s", [?INTERNAL_USER]),
+ Body = atomise_map_keys(Body0),
+ apply_defs(Body, ?INTERNAL_USER, fun() -> ok end, VHost).
+
+-spec all_definitions() -> map().
+all_definitions() ->
+ Xs = list_exchanges(),
+ Qs = list_queues(),
+ Bs = list_bindings(),
+
+ Users = list_users(),
+ VHosts = list_vhosts(),
+ Params = list_runtime_parameters(),
+ GParams = list_global_runtime_parameters(),
+ Pols = list_policies(),
+
+ Perms = list_permissions(),
+ TPerms = list_topic_permissions(),
+
+ {ok, Vsn} = application:get_key(rabbit, vsn),
+ #{
+ rabbit_version => rabbit_data_coercion:to_binary(Vsn),
+ rabbitmq_version => rabbit_data_coercion:to_binary(Vsn),
+ users => Users,
+ vhosts => VHosts,
+ permissions => Perms,
+ topic_permissions => TPerms,
+ parameters => Params,
+ global_parameters => GParams,
+ policies => Pols,
+ queues => Qs,
+ bindings => Bs,
+ exchanges => Xs
+ }.
+
+%%
+%% Implementation
+%%
+
+-spec has_configured_definitions_to_load() -> boolean().
+has_configured_definitions_to_load() ->
+ case application:get_env(rabbit, load_definitions) of
+ undefined -> false;
+ {ok, none} -> false;
+ {ok, _Path} -> true
+ end.
+
+maybe_load_definitions(App, Key) ->
+ case application:get_env(App, Key) of
+ undefined ->
+ rabbit_log:debug("No definition file configured to import via load_definitions"),
+ ok;
+ {ok, none} ->
+ rabbit_log:debug("No definition file configured to import via load_definitions"),
+ ok;
+ {ok, FileOrDir} ->
+ rabbit_log:debug("Will import definitions file from load_definitions"),
+ IsDir = filelib:is_dir(FileOrDir),
+ maybe_load_definitions_from(IsDir, FileOrDir)
+ end.
+
+maybe_load_definitions_from(true, Dir) ->
+ rabbit_log:info("Applying definitions from directory ~s", [Dir]),
+ load_definitions_from_files(file:list_dir(Dir), Dir);
+maybe_load_definitions_from(false, File) ->
+ load_definitions_from_file(File).
+
+load_definitions_from_files({ok, Filenames0}, Dir) ->
+ Filenames1 = lists:sort(Filenames0),
+ Filenames2 = [filename:join(Dir, F) || F <- Filenames1],
+ load_definitions_from_filenames(Filenames2);
+load_definitions_from_files({error, E}, Dir) ->
+ rabbit_log:error("Could not read definitions from directory ~s, Error: ~p", [Dir, E]),
+ {error, {could_not_read_defs, E}}.
+
+load_definitions_from_filenames([]) ->
+ ok;
+load_definitions_from_filenames([File|Rest]) ->
+ case load_definitions_from_file(File) of
+ ok -> load_definitions_from_filenames(Rest);
+ {error, E} -> {error, {failed_to_import_definitions, File, E}}
+ end.
+
+load_definitions_from_file(File) ->
+ case file:read_file(File) of
+ {ok, Body} ->
+ rabbit_log:info("Applying definitions from file at '~s'", [File]),
+ import_raw(Body);
+ {error, E} ->
+ rabbit_log:error("Could not read definitions from file at '~s', error: ~p", [File, E]),
+ {error, {could_not_read_defs, {File, E}}}
+ end.
+
+decode(Keys, Body) ->
+ case decode(Body) of
+ {ok, J0} ->
+ J = maps:fold(fun(K, V, Acc) ->
+ Acc#{rabbit_data_coercion:to_atom(K, utf8) => V}
+ end, J0, J0),
+ Results = [get_or_missing(K, J) || K <- Keys],
+ case [E || E = {key_missing, _} <- Results] of
+ [] -> {ok, Results, J};
+ Errors -> {error, Errors}
+ end;
+ Else -> Else
+ end.
+
+decode(<<"">>) ->
+ {ok, #{}};
+decode(Body) ->
+ try
+ Decoded = rabbit_json:decode(Body),
+ Normalised = atomise_map_keys(Decoded),
+ {ok, Normalised}
+ catch error:_ -> {error, not_json}
+ end.
+
+atomise_map_keys(Decoded) ->
+ maps:fold(fun(K, V, Acc) ->
+ Acc#{rabbit_data_coercion:to_atom(K, utf8) => V}
+ end, Decoded, Decoded).
+
+-spec apply_defs(Map :: #{atom() => any()}, ActingUser :: rabbit_types:username()) -> 'ok' | {error, term()}.
+
+apply_defs(Map, ActingUser) ->
+ apply_defs(Map, ActingUser, fun () -> ok end).
+
+-spec apply_defs(Map :: #{atom() => any()}, ActingUser :: rabbit_types:username(),
+ SuccessFun :: fun(() -> 'ok')) -> 'ok' | {error, term()};
+ (Map :: #{atom() => any()}, ActingUser :: rabbit_types:username(),
+ VHost :: vhost:name()) -> 'ok' | {error, term()}.
+
+apply_defs(Map, ActingUser, VHost) when is_binary(VHost) ->
+ apply_defs(Map, ActingUser, fun () -> ok end, VHost);
+
+apply_defs(Map, ActingUser, SuccessFun) when is_function(SuccessFun) ->
+ Version = maps:get(rabbitmq_version, Map, maps:get(rabbit_version, Map, undefined)),
+ try
+ concurrent_for_all(users, ActingUser, Map,
+ fun(User, _Username) ->
+ rabbit_auth_backend_internal:put_user(User, Version, ActingUser)
+ end),
+ concurrent_for_all(vhosts, ActingUser, Map, fun add_vhost/2),
+ validate_limits(Map),
+ concurrent_for_all(permissions, ActingUser, Map, fun add_permission/2),
+ concurrent_for_all(topic_permissions, ActingUser, Map, fun add_topic_permission/2),
+ sequential_for_all(parameters, ActingUser, Map, fun add_parameter/2),
+ sequential_for_all(global_parameters, ActingUser, Map, fun add_global_parameter/2),
+ %% importing policies concurrently can be unsafe as queues will be getting
+ %% potentially out of order notifications of applicable policy changes
+ sequential_for_all(policies, ActingUser, Map, fun add_policy/2),
+ concurrent_for_all(queues, ActingUser, Map, fun add_queue/2),
+ concurrent_for_all(exchanges, ActingUser, Map, fun add_exchange/2),
+ concurrent_for_all(bindings, ActingUser, Map, fun add_binding/2),
+ SuccessFun(),
+ ok
+ catch {error, E} -> {error, E};
+ exit:E -> {error, E}
+ end.
+
+-spec apply_defs(Map :: #{atom() => any()},
+ ActingUser :: rabbit_types:username(),
+ SuccessFun :: fun(() -> 'ok'),
+ VHost :: vhost:name()) -> 'ok' | {error, term()}.
+
+apply_defs(Map, ActingUser, SuccessFun, VHost) when is_binary(VHost) ->
+ rabbit_log:info("Asked to import definitions for a virtual host. Virtual host: ~p, acting user: ~p",
+ [VHost, ActingUser]),
+ try
+ validate_limits(Map, VHost),
+ sequential_for_all(parameters, ActingUser, Map, VHost, fun add_parameter/3),
+ %% importing policies concurrently can be unsafe as queues will be getting
+ %% potentially out of order notifications of applicable policy changes
+ sequential_for_all(policies, ActingUser, Map, VHost, fun add_policy/3),
+ concurrent_for_all(queues, ActingUser, Map, VHost, fun add_queue/3),
+ concurrent_for_all(exchanges, ActingUser, Map, VHost, fun add_exchange/3),
+ concurrent_for_all(bindings, ActingUser, Map, VHost, fun add_binding/3),
+ SuccessFun()
+ catch {error, E} -> {error, format(E)};
+ exit:E -> {error, format(E)}
+ end.
+
+-spec apply_defs(Map :: #{atom() => any()},
+ ActingUser :: rabbit_types:username(),
+ SuccessFun :: fun(() -> 'ok'),
+ ErrorFun :: fun((any()) -> 'ok'),
+ VHost :: vhost:name()) -> 'ok' | {error, term()}.
+
+apply_defs(Map, ActingUser, SuccessFun, ErrorFun, VHost) ->
+ rabbit_log:info("Asked to import definitions for a virtual host. Virtual host: ~p, acting user: ~p",
+ [VHost, ActingUser]),
+ try
+ validate_limits(Map, VHost),
+ sequential_for_all(parameters, ActingUser, Map, VHost, fun add_parameter/3),
+ %% importing policies concurrently can be unsafe as queues will be getting
+ %% potentially out of order notifications of applicable policy changes
+ sequential_for_all(policies, ActingUser, Map, VHost, fun add_policy/3),
+ concurrent_for_all(queues, ActingUser, Map, VHost, fun add_queue/3),
+ concurrent_for_all(exchanges, ActingUser, Map, VHost, fun add_exchange/3),
+ concurrent_for_all(bindings, ActingUser, Map, VHost, fun add_binding/3),
+ SuccessFun()
+ catch {error, E} -> ErrorFun(format(E));
+ exit:E -> ErrorFun(format(E))
+ end.
+
+sequential_for_all(Category, ActingUser, Definitions, Fun) ->
+ case maps:get(rabbit_data_coercion:to_atom(Category), Definitions, undefined) of
+ undefined -> ok;
+ List ->
+ case length(List) of
+ 0 -> ok;
+ N -> rabbit_log:info("Importing sequentially ~p ~s...", [N, human_readable_category_name(Category)])
+ end,
+ [begin
+ %% keys are expected to be atoms
+ Fun(atomize_keys(M), ActingUser)
+ end || M <- List, is_map(M)]
+ end.
+
+sequential_for_all(Name, ActingUser, Definitions, VHost, Fun) ->
+ case maps:get(rabbit_data_coercion:to_atom(Name), Definitions, undefined) of
+ undefined -> ok;
+ List -> [Fun(VHost, atomize_keys(M), ActingUser) || M <- List, is_map(M)]
+ end.
+
+concurrent_for_all(Category, ActingUser, Definitions, Fun) ->
+ case maps:get(rabbit_data_coercion:to_atom(Category), Definitions, undefined) of
+ undefined -> ok;
+ List ->
+ case length(List) of
+ 0 -> ok;
+ N -> rabbit_log:info("Importing concurrently ~p ~s...", [N, human_readable_category_name(Category)])
+ end,
+ WorkPoolFun = fun(M) ->
+ Fun(atomize_keys(M), ActingUser)
+ end,
+ do_concurrent_for_all(List, WorkPoolFun)
+ end.
+
+concurrent_for_all(Name, ActingUser, Definitions, VHost, Fun) ->
+ case maps:get(rabbit_data_coercion:to_atom(Name), Definitions, undefined) of
+ undefined -> ok;
+ List ->
+ WorkPoolFun = fun(M) ->
+ Fun(VHost, atomize_keys(M), ActingUser)
+ end,
+ do_concurrent_for_all(List, WorkPoolFun)
+ end.
+
+do_concurrent_for_all(List, WorkPoolFun) ->
+ {ok, Gatherer} = gatherer:start_link(),
+ [begin
+ %% keys are expected to be atoms
+ ok = gatherer:fork(Gatherer),
+ worker_pool:submit_async(
+ ?IMPORT_WORK_POOL,
+ fun() ->
+ try
+ WorkPoolFun(M)
+ catch {error, E} -> gatherer:in(Gatherer, {error, E});
+ _:E -> gatherer:in(Gatherer, {error, E})
+ end,
+ gatherer:finish(Gatherer)
+ end)
+ end || M <- List, is_map(M)],
+ case gatherer:out(Gatherer) of
+ empty ->
+ ok = gatherer:stop(Gatherer);
+ {value, {error, E}} ->
+ ok = gatherer:stop(Gatherer),
+ throw({error, E})
+ end.
+
+-spec atomize_keys(#{any() => any()}) -> #{atom() => any()}.
+
+atomize_keys(M) ->
+ maps:fold(fun(K, V, Acc) ->
+ maps:put(rabbit_data_coercion:to_atom(K), V, Acc)
+ end, #{}, M).
+
+-spec human_readable_category_name(definition_category()) -> string().
+
+human_readable_category_name(topic_permissions) -> "topic permissions";
+human_readable_category_name(parameters) -> "runtime parameters";
+human_readable_category_name(global_parameters) -> "global runtime parameters";
+human_readable_category_name(Other) -> rabbit_data_coercion:to_list(Other).
+
+
+format(#amqp_error{name = Name, explanation = Explanation}) ->
+ rabbit_data_coercion:to_binary(rabbit_misc:format("~s: ~s", [Name, Explanation]));
+format({no_such_vhost, undefined}) ->
+ rabbit_data_coercion:to_binary(
+ "Virtual host does not exist and is not specified in definitions file.");
+format({no_such_vhost, VHost}) ->
+ rabbit_data_coercion:to_binary(
+ rabbit_misc:format("Please create virtual host \"~s\" prior to importing definitions.",
+ [VHost]));
+format({vhost_limit_exceeded, ErrMsg}) ->
+ rabbit_data_coercion:to_binary(ErrMsg);
+format(E) ->
+ rabbit_data_coercion:to_binary(rabbit_misc:format("~p", [E])).
+
+add_parameter(Param, Username) ->
+ VHost = maps:get(vhost, Param, undefined),
+ add_parameter(VHost, Param, Username).
+
+add_parameter(VHost, Param, Username) ->
+ Comp = maps:get(component, Param, undefined),
+ Key = maps:get(name, Param, undefined),
+ Term = maps:get(value, Param, undefined),
+ Result = case is_map(Term) of
+ true ->
+ %% coerce maps to proplists for backwards compatibility.
+ %% See rabbitmq-management#528.
+ TermProplist = rabbit_data_coercion:to_proplist(Term),
+ rabbit_runtime_parameters:set(VHost, Comp, Key, TermProplist, Username);
+ _ ->
+ rabbit_runtime_parameters:set(VHost, Comp, Key, Term, Username)
+ end,
+ case Result of
+ ok -> ok;
+ {error_string, E} ->
+ S = rabbit_misc:format(" (~s/~s/~s)", [VHost, Comp, Key]),
+ exit(rabbit_data_coercion:to_binary(rabbit_misc:escape_html_tags(E ++ S)))
+ end.
+
+add_global_parameter(Param, Username) ->
+ Key = maps:get(name, Param, undefined),
+ Term = maps:get(value, Param, undefined),
+ case is_map(Term) of
+ true ->
+ %% coerce maps to proplists for backwards compatibility.
+ %% See rabbitmq-management#528.
+ TermProplist = rabbit_data_coercion:to_proplist(Term),
+ rabbit_runtime_parameters:set_global(Key, TermProplist, Username);
+ _ ->
+ rabbit_runtime_parameters:set_global(Key, Term, Username)
+ end.
+
+add_policy(Param, Username) ->
+ VHost = maps:get(vhost, Param, undefined),
+ add_policy(VHost, Param, Username).
+
+add_policy(VHost, Param, Username) ->
+ Key = maps:get(name, Param, undefined),
+ case rabbit_policy:set(
+ VHost, Key, maps:get(pattern, Param, undefined),
+ case maps:get(definition, Param, undefined) of
+ undefined -> undefined;
+ Def -> rabbit_data_coercion:to_proplist(Def)
+ end,
+ maps:get(priority, Param, undefined),
+ maps:get('apply-to', Param, <<"all">>),
+ Username) of
+ ok -> ok;
+ {error_string, E} -> S = rabbit_misc:format(" (~s/~s)", [VHost, Key]),
+ exit(rabbit_data_coercion:to_binary(rabbit_misc:escape_html_tags(E ++ S)))
+ end.
+
+-spec add_vhost(map(), rabbit_types:username()) -> ok.
+
+add_vhost(VHost, ActingUser) ->
+ VHostName = maps:get(name, VHost, undefined),
+ VHostTrace = maps:get(tracing, VHost, undefined),
+ VHostDefinition = maps:get(definition, VHost, undefined),
+ VHostTags = maps:get(tags, VHost, undefined),
+ rabbit_vhost:put_vhost(VHostName, VHostDefinition, VHostTags, VHostTrace, ActingUser).
+
+add_permission(Permission, ActingUser) ->
+ rabbit_auth_backend_internal:set_permissions(maps:get(user, Permission, undefined),
+ maps:get(vhost, Permission, undefined),
+ maps:get(configure, Permission, undefined),
+ maps:get(write, Permission, undefined),
+ maps:get(read, Permission, undefined),
+ ActingUser).
+
+add_topic_permission(TopicPermission, ActingUser) ->
+ rabbit_auth_backend_internal:set_topic_permissions(
+ maps:get(user, TopicPermission, undefined),
+ maps:get(vhost, TopicPermission, undefined),
+ maps:get(exchange, TopicPermission, undefined),
+ maps:get(write, TopicPermission, undefined),
+ maps:get(read, TopicPermission, undefined),
+ ActingUser).
+
+add_queue(Queue, ActingUser) ->
+ add_queue_int(Queue, r(queue, Queue), ActingUser).
+
+add_queue(VHost, Queue, ActingUser) ->
+ add_queue_int(Queue, rv(VHost, queue, Queue), ActingUser).
+
+add_queue_int(_Queue, R = #resource{kind = queue,
+ name = <<"amq.", _/binary>>}, ActingUser) ->
+ Name = R#resource.name,
+ rabbit_log:warning("Skipping import of a queue whose name begins with 'amq.', "
+ "name: ~s, acting user: ~s", [Name, ActingUser]);
+add_queue_int(Queue, Name, ActingUser) ->
+ rabbit_amqqueue:declare(Name,
+ maps:get(durable, Queue, undefined),
+ maps:get(auto_delete, Queue, undefined),
+ args(maps:get(arguments, Queue, undefined)),
+ none,
+ ActingUser).
+
+add_exchange(Exchange, ActingUser) ->
+ add_exchange_int(Exchange, r(exchange, Exchange), ActingUser).
+
+add_exchange(VHost, Exchange, ActingUser) ->
+ add_exchange_int(Exchange, rv(VHost, exchange, Exchange), ActingUser).
+
+add_exchange_int(_Exchange, #resource{kind = exchange, name = <<"">>}, ActingUser) ->
+ rabbit_log:warning("Not importing the default exchange, acting user: ~s", [ActingUser]);
+add_exchange_int(_Exchange, R = #resource{kind = exchange,
+ name = <<"amq.", _/binary>>}, ActingUser) ->
+ Name = R#resource.name,
+ rabbit_log:warning("Skipping import of an exchange whose name begins with 'amq.', "
+ "name: ~s, acting user: ~s", [Name, ActingUser]);
+add_exchange_int(Exchange, Name, ActingUser) ->
+ Internal = case maps:get(internal, Exchange, undefined) of
+ undefined -> false; %% =< 2.2.0
+ I -> I
+ end,
+ rabbit_exchange:declare(Name,
+ rabbit_exchange:check_type(maps:get(type, Exchange, undefined)),
+ maps:get(durable, Exchange, undefined),
+ maps:get(auto_delete, Exchange, undefined),
+ Internal,
+ args(maps:get(arguments, Exchange, undefined)),
+ ActingUser).
+
+add_binding(Binding, ActingUser) ->
+ DestType = dest_type(Binding),
+ add_binding_int(Binding, r(exchange, source, Binding),
+ r(DestType, destination, Binding), ActingUser).
+
+add_binding(VHost, Binding, ActingUser) ->
+ DestType = dest_type(Binding),
+ add_binding_int(Binding, rv(VHost, exchange, source, Binding),
+ rv(VHost, DestType, destination, Binding), ActingUser).
+
+add_binding_int(Binding, Source, Destination, ActingUser) ->
+ rabbit_binding:add(
+ #binding{source = Source,
+ destination = Destination,
+ key = maps:get(routing_key, Binding, undefined),
+ args = args(maps:get(arguments, Binding, undefined))},
+ ActingUser).
+
+dest_type(Binding) ->
+ rabbit_data_coercion:to_atom(maps:get(destination_type, Binding, undefined)).
+
+r(Type, Props) -> r(Type, name, Props).
+
+r(Type, Name, Props) ->
+ rabbit_misc:r(maps:get(vhost, Props, undefined), Type, maps:get(Name, Props, undefined)).
+
+rv(VHost, Type, Props) -> rv(VHost, Type, name, Props).
+
+rv(VHost, Type, Name, Props) ->
+ rabbit_misc:r(VHost, Type, maps:get(Name, Props, undefined)).
+
+%%--------------------------------------------------------------------
+
+validate_limits(All) ->
+ case maps:get(queues, All, undefined) of
+ undefined -> ok;
+ Queues0 ->
+ {ok, VHostMap} = filter_out_existing_queues(Queues0),
+ maps:fold(fun validate_vhost_limit/3, ok, VHostMap)
+ end.
+
+validate_limits(All, VHost) ->
+ case maps:get(queues, All, undefined) of
+ undefined -> ok;
+ Queues0 ->
+ Queues1 = filter_out_existing_queues(VHost, Queues0),
+ AddCount = length(Queues1),
+ validate_vhost_limit(VHost, AddCount, ok)
+ end.
+
+filter_out_existing_queues(Queues) ->
+ build_filtered_map(Queues, maps:new()).
+
+filter_out_existing_queues(VHost, Queues) ->
+ Pred = fun(Queue) ->
+ Rec = rv(VHost, queue, <<"name">>, Queue),
+ case rabbit_amqqueue:lookup(Rec) of
+ {ok, _} -> false;
+ {error, not_found} -> true
+ end
+ end,
+ lists:filter(Pred, Queues).
+
+build_queue_data(Queue) ->
+ VHost = maps:get(<<"vhost">>, Queue, undefined),
+ Rec = rv(VHost, queue, <<"name">>, Queue),
+ {Rec, VHost}.
+
+build_filtered_map([], AccMap) ->
+ {ok, AccMap};
+build_filtered_map([Queue|Rest], AccMap0) ->
+ {Rec, VHost} = build_queue_data(Queue),
+ case rabbit_amqqueue:lookup(Rec) of
+ {error, not_found} ->
+ AccMap1 = maps:update_with(VHost, fun(V) -> V + 1 end, 1, AccMap0),
+ build_filtered_map(Rest, AccMap1);
+ {ok, _} ->
+ build_filtered_map(Rest, AccMap0)
+ end.
+
+validate_vhost_limit(VHost, AddCount, ok) ->
+ WouldExceed = rabbit_vhost_limit:would_exceed_queue_limit(AddCount, VHost),
+ validate_vhost_queue_limit(VHost, AddCount, WouldExceed).
+
+validate_vhost_queue_limit(_VHost, 0, _) ->
+ % Note: not adding any new queues so the upload
+ % must be update-only
+ ok;
+validate_vhost_queue_limit(_VHost, _AddCount, false) ->
+ % Note: would not exceed queue limit
+ ok;
+validate_vhost_queue_limit(VHost, AddCount, {true, Limit, QueueCount}) ->
+ ErrFmt = "Adding ~B queue(s) to virtual host \"~s\" would exceed the limit of ~B queue(s).~n~nThis virtual host currently has ~B queue(s) defined.~n~nImport aborted!",
+ ErrInfo = [AddCount, VHost, Limit, QueueCount],
+ ErrMsg = rabbit_misc:format(ErrFmt, ErrInfo),
+ exit({vhost_limit_exceeded, ErrMsg}).
+
+get_or_missing(K, L) ->
+ case maps:get(K, L, undefined) of
+ undefined -> {key_missing, K};
+ V -> V
+ end.
+
+args([]) -> args(#{});
+args(L) -> rabbit_misc:to_amqp_table(L).
+
+%%
+%% Export
+%%
+
+list_exchanges() ->
+ %% exclude internal exchanges, they are not meant to be declared or used by
+ %% applications
+ [exchange_definition(X) || X <- lists:filter(fun(#exchange{internal = true}) -> false;
+ (#exchange{name = #resource{name = <<>>}}) -> false;
+ (X) -> not rabbit_exchange:is_amq_prefixed(X)
+ end,
+ rabbit_exchange:list())].
+
+exchange_definition(#exchange{name = #resource{virtual_host = VHost, name = Name},
+ type = Type,
+ durable = Durable, auto_delete = AD, arguments = Args}) ->
+ #{<<"vhost">> => VHost,
+ <<"name">> => Name,
+ <<"type">> => Type,
+ <<"durable">> => Durable,
+ <<"auto_delete">> => AD,
+ <<"arguments">> => rabbit_misc:amqp_table(Args)}.
+
+list_queues() ->
+ %% exclude exclusive queues, they cannot be restored
+ [queue_definition(Q) || Q <- lists:filter(fun(Q0) ->
+ amqqueue:get_exclusive_owner(Q0) =:= none
+ end,
+ rabbit_amqqueue:list())].
+
+queue_definition(Q) ->
+ #resource{virtual_host = VHost, name = Name} = amqqueue:get_name(Q),
+ Type = case amqqueue:get_type(Q) of
+ rabbit_classic_queue -> classic;
+ rabbit_quorum_queue -> quorum;
+ rabbit_stream_queue -> stream;
+ T -> T
+ end,
+ #{
+ <<"vhost">> => VHost,
+ <<"name">> => Name,
+ <<"type">> => Type,
+ <<"durable">> => amqqueue:is_durable(Q),
+ <<"auto_delete">> => amqqueue:is_auto_delete(Q),
+ <<"arguments">> => rabbit_misc:amqp_table(amqqueue:get_arguments(Q))
+ }.
+
+list_bindings() ->
+ [binding_definition(B) || B <- rabbit_binding:list_explicit()].
+
+binding_definition(#binding{source = S,
+ key = RoutingKey,
+ destination = D,
+ args = Args}) ->
+ #{
+ <<"source">> => S#resource.name,
+ <<"vhost">> => S#resource.virtual_host,
+ <<"destination">> => D#resource.name,
+ <<"destination_type">> => D#resource.kind,
+ <<"routing_key">> => RoutingKey,
+ <<"arguments">> => rabbit_misc:amqp_table(Args)
+ }.
+
+list_vhosts() ->
+ [vhost_definition(V) || V <- rabbit_vhost:all()].
+
+vhost_definition(VHost) ->
+ #{
+ <<"name">> => vhost:get_name(VHost),
+ <<"limits">> => vhost:get_limits(VHost),
+ <<"metadata">> => vhost:get_metadata(VHost)
+ }.
+
+list_users() ->
+ [user_definition(U) || U <- rabbit_auth_backend_internal:all_users()].
+
+user_definition(User) ->
+ #{<<"name">> => internal_user:get_username(User),
+ <<"password_hash">> => base64:encode(internal_user:get_password_hash(User)),
+ <<"hashing_algorithm">> => rabbit_auth_backend_internal:hashing_module_for_user(User),
+ <<"tags">> => tags_as_binaries(internal_user:get_tags(User)),
+ <<"limits">> => internal_user:get_limits(User)
+ }.
+
+list_runtime_parameters() ->
+ [runtime_parameter_definition(P) || P <- rabbit_runtime_parameters:list(), is_list(P)].
+
+runtime_parameter_definition(Param) ->
+ #{
+ <<"vhost">> => pget(vhost, Param),
+ <<"component">> => pget(component, Param),
+ <<"name">> => pget(name, Param),
+ <<"value">> => maps:from_list(pget(value, Param))
+ }.
+
+list_global_runtime_parameters() ->
+ [global_runtime_parameter_definition(P) || P <- rabbit_runtime_parameters:list_global(), not is_internal_parameter(P)].
+
+global_runtime_parameter_definition(P0) ->
+ P = [{rabbit_data_coercion:to_binary(K), V} || {K, V} <- P0],
+ maps:from_list(P).
+
+-define(INTERNAL_GLOBAL_PARAM_PREFIX, "internal").
+
+is_internal_parameter(Param) ->
+ Name = rabbit_data_coercion:to_list(pget(name, Param)),
+ %% if global parameter name starts with an "internal", consider it to be internal
+ %% and exclude it from definition export
+ string:left(Name, length(?INTERNAL_GLOBAL_PARAM_PREFIX)) =:= ?INTERNAL_GLOBAL_PARAM_PREFIX.
+
+list_policies() ->
+ [policy_definition(P) || P <- rabbit_policy:list()].
+
+policy_definition(Policy) ->
+ #{
+ <<"vhost">> => pget(vhost, Policy),
+ <<"name">> => pget(name, Policy),
+ <<"pattern">> => pget(pattern, Policy),
+ <<"apply-to">> => pget('apply-to', Policy),
+ <<"priority">> => pget(priority, Policy),
+ <<"definition">> => maps:from_list(pget(definition, Policy))
+ }.
+
+list_permissions() ->
+ [permission_definition(P) || P <- rabbit_auth_backend_internal:list_permissions()].
+
+permission_definition(P0) ->
+ P = [{rabbit_data_coercion:to_binary(K), V} || {K, V} <- P0],
+ maps:from_list(P).
+
+list_topic_permissions() ->
+ [topic_permission_definition(P) || P <- rabbit_auth_backend_internal:list_topic_permissions()].
+
+topic_permission_definition(P0) ->
+ P = [{rabbit_data_coercion:to_binary(K), V} || {K, V} <- P0],
+ maps:from_list(P).
+
+tags_as_binaries(Tags) ->
+ list_to_binary(string:join([atom_to_list(T) || T <- Tags], ",")).
diff --git a/deps/rabbit/src/rabbit_diagnostics.erl b/deps/rabbit/src/rabbit_diagnostics.erl
new file mode 100644
index 0000000000..999596cdc9
--- /dev/null
+++ b/deps/rabbit/src/rabbit_diagnostics.erl
@@ -0,0 +1,119 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_diagnostics).
+
+-define(PROCESS_INFO,
+ [registered_name, current_stacktrace, initial_call, message_queue_len,
+ links, monitors, monitored_by, heap_size]).
+
+-export([maybe_stuck/0, maybe_stuck/1, top_memory_use/0, top_memory_use/1,
+ top_binary_refs/0, top_binary_refs/1]).
+
+maybe_stuck() -> maybe_stuck(5000).
+
+maybe_stuck(Timeout) ->
+ Pids = processes(),
+ io:format("~s There are ~p processes.~n", [get_time(), length(Pids)]),
+ maybe_stuck(Pids, Timeout).
+
+maybe_stuck(Pids, Timeout) when Timeout =< 0 ->
+ io:format("~s Found ~p suspicious processes.~n", [get_time(), length(Pids)]),
+ [io:format("~s ~p~n", [get_time(), info(Pid)]) || Pid <- Pids],
+ ok;
+maybe_stuck(Pids, Timeout) ->
+ Pids2 = [P || P <- Pids, looks_stuck(P)],
+ io:format("~s Investigated ~p processes this round, ~pms to go.~n",
+ [get_time(), length(Pids2), Timeout]),
+ timer:sleep(500),
+ maybe_stuck(Pids2, Timeout - 500).
+
+looks_stuck(Pid) ->
+ case info(Pid, status, gone) of
+ {status, waiting} ->
+ %% It's tempting to just check for message_queue_len > 0
+ %% here rather than mess around with stack traces and
+ %% heuristics. But really, sometimes freshly stuck
+ %% processes can have 0 messages...
+ case info(Pid, current_stacktrace, gone) of
+ {current_stacktrace, [H|_]} ->
+ maybe_stuck_stacktrace(H);
+ _ ->
+ false
+ end;
+ _ ->
+ false
+ end.
+
+maybe_stuck_stacktrace({gen_server2, process_next_msg, _}) -> false;
+maybe_stuck_stacktrace({gen_event, fetch_msg, _}) -> false;
+maybe_stuck_stacktrace({prim_inet, accept0, _}) -> false;
+maybe_stuck_stacktrace({prim_inet, recv0, _}) -> false;
+maybe_stuck_stacktrace({rabbit_heartbeat, heartbeater, _}) -> false;
+maybe_stuck_stacktrace({rabbit_net, recv, _}) -> false;
+maybe_stuck_stacktrace({group, _, _}) -> false;
+maybe_stuck_stacktrace({shell, _, _}) -> false;
+maybe_stuck_stacktrace({io, _, _}) -> false;
+maybe_stuck_stacktrace({M, F, A, _}) ->
+ maybe_stuck_stacktrace({M, F, A});
+maybe_stuck_stacktrace({_M, F, _A}) ->
+ case string:str(atom_to_list(F), "loop") of
+ 0 -> true;
+ _ -> false
+ end.
+
+top_memory_use() -> top_memory_use(30).
+
+top_memory_use(Count) ->
+ Pids = processes(),
+ io:format("~s Memory use: top ~p of ~p processes.~n", [get_time(), Count, length(Pids)]),
+ Procs = [{info(Pid, memory, 0), info(Pid)} || Pid <- Pids],
+ Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count),
+ io:format("~s ~p~n", [get_time(), Sorted]).
+
+top_binary_refs() -> top_binary_refs(30).
+
+top_binary_refs(Count) ->
+ Pids = processes(),
+ io:format("~s Binary refs: top ~p of ~p processes.~n", [get_time(), Count, length(Pids)]),
+ Procs = [{{binary_refs, binary_refs(Pid)}, info(Pid)} || Pid <- Pids],
+ Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count),
+ io:format("~s ~p~n", [get_time(), Sorted]).
+
+binary_refs(Pid) ->
+ case info(Pid, binary, []) of
+ {binary, Refs} ->
+ lists:sum([Sz || {_Ptr, Sz} <- lists:usort([{Ptr, Sz} ||
+ {Ptr, Sz, _Cnt} <- Refs])]);
+ _ -> 0
+ end.
+
+info(Pid) ->
+ [{pid, Pid} | info(Pid, ?PROCESS_INFO, [])].
+
+info(Pid, Infos, Default) ->
+ try
+ process_info(Pid, Infos)
+ catch
+ _:_ -> case is_atom(Infos) of
+ true -> {Infos, Default};
+ false -> Default
+ end
+ end.
+
+get_time() ->
+ {{Y,M,D}, {H,Min,Sec}} = calendar:local_time(),
+ [ integer_to_list(Y), "-",
+ prefix_zero(integer_to_list(M)), "-",
+ prefix_zero(integer_to_list(D)), " ",
+ prefix_zero(integer_to_list(H)), ":",
+ prefix_zero(integer_to_list(Min)), ":",
+ prefix_zero(integer_to_list(Sec))
+ ].
+
+prefix_zero([C]) -> [$0, C];
+prefix_zero([_,_] = Full) -> Full.
diff --git a/deps/rabbit/src/rabbit_direct.erl b/deps/rabbit/src/rabbit_direct.erl
new file mode 100644
index 0000000000..3fc2d75908
--- /dev/null
+++ b/deps/rabbit/src/rabbit_direct.erl
@@ -0,0 +1,235 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_direct).
+
+-export([boot/0, force_event_refresh/1, list/0, connect/5,
+ start_channel/10, disconnect/2]).
+
+-deprecated([{force_event_refresh, 1, eventually}]).
+
+%% Internal
+-export([list_local/0]).
+
+%% For testing only
+-export([extract_extra_auth_props/4]).
+
+-include("rabbit.hrl").
+-include("rabbit_misc.hrl").
+
+%%----------------------------------------------------------------------------
+
+-spec boot() -> 'ok'.
+
+boot() -> rabbit_sup:start_supervisor_child(
+ rabbit_direct_client_sup, rabbit_client_sup,
+ [{local, rabbit_direct_client_sup},
+ {rabbit_channel_sup, start_link, []}]).
+
+-spec force_event_refresh(reference()) -> 'ok'.
+
+force_event_refresh(Ref) ->
+ [Pid ! {force_event_refresh, Ref} || Pid <- list()],
+ ok.
+
+-spec list_local() -> [pid()].
+
+list_local() ->
+ pg_local:get_members(rabbit_direct).
+
+-spec list() -> [pid()].
+
+list() ->
+ Nodes = rabbit_nodes:all_running(),
+ rabbit_misc:append_rpc_all_nodes(Nodes, rabbit_direct, list_local, [], ?RPC_TIMEOUT).
+
+%%----------------------------------------------------------------------------
+
+auth_fun({none, _}, _VHost, _ExtraAuthProps) ->
+ fun () -> {ok, rabbit_auth_backend_dummy:user()} end;
+
+auth_fun({Username, none}, _VHost, _ExtraAuthProps) ->
+ fun () -> rabbit_access_control:check_user_login(Username, []) end;
+
+auth_fun({Username, Password}, VHost, ExtraAuthProps) ->
+ fun () ->
+ rabbit_access_control:check_user_login(
+ Username,
+ [{password, Password}, {vhost, VHost}] ++ ExtraAuthProps)
+ end.
+
+-spec connect
+ (({'none', 'none'} | {rabbit_types:username(), 'none'} |
+ {rabbit_types:username(), rabbit_types:password()}),
+ rabbit_types:vhost(), rabbit_types:protocol(), pid(),
+ rabbit_event:event_props()) ->
+ rabbit_types:ok_or_error2(
+ {rabbit_types:user(), rabbit_framing:amqp_table()},
+ 'broker_not_found_on_node' |
+ {'auth_failure', string()} | 'access_refused').
+
+connect(Creds, VHost, Protocol, Pid, Infos) ->
+ ExtraAuthProps = extract_extra_auth_props(Creds, VHost, Pid, Infos),
+ AuthFun = auth_fun(Creds, VHost, ExtraAuthProps),
+ case rabbit:is_running() of
+ true ->
+ case whereis(rabbit_direct_client_sup) of
+ undefined ->
+ {error, broker_is_booting};
+ _ ->
+ case is_over_vhost_connection_limit(VHost, Creds, Pid) of
+ true ->
+ {error, not_allowed};
+ false ->
+ case is_vhost_alive(VHost, Creds, Pid) of
+ false ->
+ {error, {internal_error, vhost_is_down}};
+ true ->
+ case AuthFun() of
+ {ok, User = #user{username = Username}} ->
+ notify_auth_result(Username,
+ user_authentication_success, []),
+ connect1(User, VHost, Protocol, Pid, Infos);
+ {refused, Username, Msg, Args} ->
+ notify_auth_result(Username,
+ user_authentication_failure,
+ [{error, rabbit_misc:format(Msg, Args)}]),
+ {error, {auth_failure, "Refused"}}
+ end %% AuthFun()
+ end %% is_vhost_alive
+ end %% is_over_vhost_connection_limit
+ end;
+ false -> {error, broker_not_found_on_node}
+ end.
+
+extract_extra_auth_props(Creds, VHost, Pid, Infos) ->
+ case extract_protocol(Infos) of
+ undefined ->
+ [];
+ Protocol ->
+ maybe_call_connection_info_module(Protocol, Creds, VHost, Pid, Infos)
+ end.
+
+extract_protocol(Infos) ->
+ case proplists:get_value(protocol, Infos, undefined) of
+ {Protocol, _Version} ->
+ Protocol;
+ _ ->
+ undefined
+ end.
+
+maybe_call_connection_info_module(Protocol, Creds, VHost, Pid, Infos) ->
+ Module = rabbit_data_coercion:to_atom(string:to_lower(
+ "rabbit_" ++
+ lists:flatten(string:replace(rabbit_data_coercion:to_list(Protocol), " ", "_", all)) ++
+ "_connection_info")
+ ),
+ Args = [Creds, VHost, Pid, Infos],
+ code_server_cache:maybe_call_mfa(Module, additional_authn_params, Args, []).
+
+is_vhost_alive(VHost, {Username, _Password}, Pid) ->
+ PrintedUsername = case Username of
+ none -> "";
+ _ -> Username
+ end,
+ case rabbit_vhost_sup_sup:is_vhost_alive(VHost) of
+ true -> true;
+ false ->
+ rabbit_log_connection:error(
+ "Error on Direct connection ~p~n"
+ "access to vhost '~s' refused for user '~s': "
+ "vhost '~s' is down",
+ [Pid, VHost, PrintedUsername, VHost]),
+ false
+ end.
+
+is_over_vhost_connection_limit(VHost, {Username, _Password}, Pid) ->
+ PrintedUsername = case Username of
+ none -> "";
+ _ -> Username
+ end,
+ try rabbit_vhost_limit:is_over_connection_limit(VHost) of
+ false -> false;
+ {true, Limit} ->
+ rabbit_log_connection:error(
+ "Error on Direct connection ~p~n"
+ "access to vhost '~s' refused for user '~s': "
+ "vhost connection limit (~p) is reached",
+ [Pid, VHost, PrintedUsername, Limit]),
+ true
+ catch
+ throw:{error, {no_such_vhost, VHost}} ->
+ rabbit_log_connection:error(
+ "Error on Direct connection ~p~n"
+ "vhost ~s not found", [Pid, VHost]),
+ true
+ end.
+
+notify_auth_result(Username, AuthResult, ExtraProps) ->
+ EventProps = [{connection_type, direct},
+ {name, case Username of none -> ''; _ -> Username end}] ++
+ ExtraProps,
+ rabbit_event:notify(AuthResult, [P || {_, V} = P <- EventProps, V =/= '']).
+
+connect1(User = #user{username = Username}, VHost, Protocol, Pid, Infos) ->
+ case rabbit_auth_backend_internal:is_over_connection_limit(Username) of
+ false ->
+ % Note: peer_host can be either a tuple or
+ % a binary if reverse_dns_lookups is enabled
+ PeerHost = proplists:get_value(peer_host, Infos),
+ AuthzContext = proplists:get_value(variable_map, Infos, #{}),
+ try rabbit_access_control:check_vhost_access(User, VHost,
+ {ip, PeerHost}, AuthzContext) of
+ ok -> ok = pg_local:join(rabbit_direct, Pid),
+ rabbit_core_metrics:connection_created(Pid, Infos),
+ rabbit_event:notify(connection_created, Infos),
+ {ok, {User, rabbit_reader:server_properties(Protocol)}}
+ catch
+ exit:#amqp_error{name = Reason = not_allowed} ->
+ {error, Reason}
+ end;
+ {true, Limit} ->
+ rabbit_log_connection:error(
+ "Error on Direct connection ~p~n"
+ "access refused for user '~s': "
+ "user connection limit (~p) is reached",
+ [Pid, Username, Limit]),
+ {error, not_allowed}
+ end.
+
+-spec start_channel
+ (rabbit_channel:channel_number(), pid(), pid(), string(),
+ rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(),
+ rabbit_framing:amqp_table(), pid(), any()) ->
+ {'ok', pid()}.
+
+start_channel(Number, ClientChannelPid, ConnPid, ConnName, Protocol,
+ User = #user{username = Username}, VHost, Capabilities,
+ Collector, AmqpParams) ->
+ case rabbit_auth_backend_internal:is_over_channel_limit(Username) of
+ false ->
+ {ok, _, {ChannelPid, _}} =
+ supervisor2:start_child(
+ rabbit_direct_client_sup,
+ [{direct, Number, ClientChannelPid, ConnPid, ConnName, Protocol,
+ User, VHost, Capabilities, Collector, AmqpParams}]),
+ {ok, ChannelPid};
+ {true, Limit} ->
+ rabbit_log_connection:error(
+ "Error on direct connection ~p~n"
+ "number of channels opened for user '~s' has reached the "
+ "maximum allowed limit of (~w)",
+ [ConnPid, Username, Limit]),
+ {error, not_allowed}
+ end.
+
+-spec disconnect(pid(), rabbit_event:event_props()) -> 'ok'.
+
+disconnect(Pid, Infos) ->
+ pg_local:leave(rabbit_direct, Pid),
+ rabbit_core_metrics:connection_closed(Pid),
+ rabbit_event:notify(connection_closed, Infos).
diff --git a/deps/rabbit/src/rabbit_disk_monitor.erl b/deps/rabbit/src/rabbit_disk_monitor.erl
new file mode 100644
index 0000000000..8277794098
--- /dev/null
+++ b/deps/rabbit/src/rabbit_disk_monitor.erl
@@ -0,0 +1,317 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_disk_monitor).
+
+%% Disk monitoring server. Monitors free disk space
+%% periodically and sets alarms when it is below a certain
+%% watermark (configurable either as an absolute value or
+%% relative to the memory limit).
+%%
+%% Disk monitoring is done by shelling out to /usr/bin/df
+%% instead of related built-in OTP functions because currently
+%% this is the most reliable way of determining free disk space
+%% for the partition our internal database is on.
+%%
+%% Update interval is dynamically calculated assuming disk
+%% space is being filled at FAST_RATE.
+
+-behaviour(gen_server).
+
+-export([start_link/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-export([get_disk_free_limit/0, set_disk_free_limit/1,
+ get_min_check_interval/0, set_min_check_interval/1,
+ get_max_check_interval/0, set_max_check_interval/1,
+ get_disk_free/0, set_enabled/1]).
+
+-define(SERVER, ?MODULE).
+-define(DEFAULT_MIN_DISK_CHECK_INTERVAL, 100).
+-define(DEFAULT_MAX_DISK_CHECK_INTERVAL, 10000).
+-define(DEFAULT_DISK_FREE_LIMIT, 50000000).
+%% 250MB/s i.e. 250kB/ms
+-define(FAST_RATE, (250 * 1000)).
+
+-record(state, {
+ %% monitor partition on which this directory resides
+ dir,
+ %% configured limit in bytes
+ limit,
+ %% last known free disk space amount in bytes
+ actual,
+ %% minimum check interval
+ min_interval,
+ %% maximum check interval
+ max_interval,
+ %% timer that drives periodic checks
+ timer,
+ %% is free disk space alarm currently in effect?
+ alarmed,
+ %% is monitoring enabled? false on unsupported
+ %% platforms
+ enabled,
+ %% number of retries to enable monitoring if it fails
+ %% on start-up
+ retries,
+ %% Interval between retries
+ interval
+}).
+
+%%----------------------------------------------------------------------------
+
+-type disk_free_limit() :: (integer() | string() | {'mem_relative', float() | integer()}).
+
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
+-spec get_disk_free_limit() -> integer().
+
+get_disk_free_limit() ->
+ gen_server:call(?MODULE, get_disk_free_limit, infinity).
+
+-spec set_disk_free_limit(disk_free_limit()) -> 'ok'.
+
+set_disk_free_limit(Limit) ->
+ gen_server:call(?MODULE, {set_disk_free_limit, Limit}, infinity).
+
+-spec get_min_check_interval() -> integer().
+
+get_min_check_interval() ->
+ gen_server:call(?MODULE, get_min_check_interval, infinity).
+
+-spec set_min_check_interval(integer()) -> 'ok'.
+
+set_min_check_interval(Interval) ->
+ gen_server:call(?MODULE, {set_min_check_interval, Interval}, infinity).
+
+-spec get_max_check_interval() -> integer().
+
+get_max_check_interval() ->
+ gen_server:call(?MODULE, get_max_check_interval, infinity).
+
+-spec set_max_check_interval(integer()) -> 'ok'.
+
+set_max_check_interval(Interval) ->
+ gen_server:call(?MODULE, {set_max_check_interval, Interval}, infinity).
+
+-spec get_disk_free() -> (integer() | 'unknown').
+-spec set_enabled(string()) -> 'ok'.
+
+get_disk_free() ->
+ gen_server:call(?MODULE, get_disk_free, infinity).
+
+set_enabled(Enabled) ->
+ gen_server:call(?MODULE, {set_enabled, Enabled}, infinity).
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
+
+-spec start_link(disk_free_limit()) -> rabbit_types:ok_pid_or_error().
+
+start_link(Args) ->
+ gen_server:start_link({local, ?SERVER}, ?MODULE, [Args], []).
+
+init([Limit]) ->
+ Dir = dir(),
+ {ok, Retries} = application:get_env(rabbit, disk_monitor_failure_retries),
+ {ok, Interval} = application:get_env(rabbit, disk_monitor_failure_retry_interval),
+ State = #state{dir = Dir,
+ min_interval = ?DEFAULT_MIN_DISK_CHECK_INTERVAL,
+ max_interval = ?DEFAULT_MAX_DISK_CHECK_INTERVAL,
+ alarmed = false,
+ enabled = true,
+ limit = Limit,
+ retries = Retries,
+ interval = Interval},
+ {ok, enable(State)}.
+
+handle_call(get_disk_free_limit, _From, State = #state{limit = Limit}) ->
+ {reply, Limit, State};
+
+handle_call({set_disk_free_limit, _}, _From, #state{enabled = false} = State) ->
+ rabbit_log:info("Cannot set disk free limit: "
+ "disabled disk free space monitoring", []),
+ {reply, ok, State};
+
+handle_call({set_disk_free_limit, Limit}, _From, State) ->
+ {reply, ok, set_disk_limits(State, Limit)};
+
+handle_call(get_min_check_interval, _From, State) ->
+ {reply, State#state.min_interval, State};
+
+handle_call(get_max_check_interval, _From, State) ->
+ {reply, State#state.max_interval, State};
+
+handle_call({set_min_check_interval, MinInterval}, _From, State) ->
+ {reply, ok, State#state{min_interval = MinInterval}};
+
+handle_call({set_max_check_interval, MaxInterval}, _From, State) ->
+ {reply, ok, State#state{max_interval = MaxInterval}};
+
+handle_call(get_disk_free, _From, State = #state { actual = Actual }) ->
+ {reply, Actual, State};
+
+handle_call({set_enabled, _Enabled = true}, _From, State) ->
+ start_timer(set_disk_limits(State, State#state.limit)),
+ rabbit_log:info("Free disk space monitor was enabled"),
+ {reply, ok, State#state{enabled = true}};
+handle_call({set_enabled, _Enabled = false}, _From, State) ->
+ erlang:cancel_timer(State#state.timer),
+ rabbit_log:info("Free disk space monitor was manually disabled"),
+ {reply, ok, State#state{enabled = false}};
+
+handle_call(_Request, _From, State) ->
+ {noreply, State}.
+
+handle_cast(_Request, State) ->
+ {noreply, State}.
+
+handle_info(try_enable, #state{retries = Retries} = State) ->
+ {noreply, enable(State#state{retries = Retries - 1})};
+handle_info(update, State) ->
+ {noreply, start_timer(internal_update(State))};
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%----------------------------------------------------------------------------
+%% Server Internals
+%%----------------------------------------------------------------------------
+
+% the partition / drive containing this directory will be monitored
+dir() -> rabbit_mnesia:dir().
+
+set_disk_limits(State, Limit0) ->
+ Limit = interpret_limit(Limit0),
+ State1 = State#state { limit = Limit },
+ rabbit_log:info("Disk free limit set to ~pMB~n",
+ [trunc(Limit / 1000000)]),
+ internal_update(State1).
+
+internal_update(State = #state { limit = Limit,
+ dir = Dir,
+ alarmed = Alarmed}) ->
+ CurrentFree = get_disk_free(Dir),
+ NewAlarmed = CurrentFree < Limit,
+ case {Alarmed, NewAlarmed} of
+ {false, true} ->
+ emit_update_info("insufficient", CurrentFree, Limit),
+ rabbit_alarm:set_alarm({{resource_limit, disk, node()}, []});
+ {true, false} ->
+ emit_update_info("sufficient", CurrentFree, Limit),
+ rabbit_alarm:clear_alarm({resource_limit, disk, node()});
+ _ ->
+ ok
+ end,
+ State #state {alarmed = NewAlarmed, actual = CurrentFree}.
+
+get_disk_free(Dir) ->
+ get_disk_free(Dir, os:type()).
+
+get_disk_free(Dir, {unix, Sun})
+ when Sun =:= sunos; Sun =:= sunos4; Sun =:= solaris ->
+ Df = os:find_executable("df"),
+ parse_free_unix(rabbit_misc:os_cmd(Df ++ " -k " ++ Dir));
+get_disk_free(Dir, {unix, _}) ->
+ Df = os:find_executable("df"),
+ parse_free_unix(rabbit_misc:os_cmd(Df ++ " -kP " ++ Dir));
+get_disk_free(Dir, {win32, _}) ->
+ %% On Windows, the Win32 API enforces a limit of 260 characters
+ %% (MAX_PATH). If we call `dir` with a path longer than that, it
+ %% fails with "File not found". Starting with Windows 10 version
+ %% 1607, this limit was removed, but the administrator has to
+ %% configure that.
+ %%
+ %% NTFS supports paths up to 32767 characters. Therefore, paths
+ %% longer than 260 characters exist but they are "inaccessible" to
+ %% `dir`.
+ %%
+ %% A workaround is to tell the Win32 API to not parse a path and
+ %% just pass it raw to the underlying filesystem. To do this, the
+ %% path must be prepended with "\\?\". That's what we do here.
+ %%
+ %% However, the underlying filesystem may not support forward
+ %% slashes transparently, as the Win32 API does. Therefore, we
+ %% convert all forward slashes to backslashes.
+ %%
+ %% See the following page to learn more about this:
+ %% https://ss64.com/nt/syntax-filenames.html
+ RawDir = "\\\\?\\" ++ string:replace(Dir, "/", "\\", all),
+ parse_free_win32(rabbit_misc:os_cmd("dir /-C /W \"" ++ RawDir ++ "\"")).
+
+parse_free_unix(Str) ->
+ case string:tokens(Str, "\n") of
+ [_, S | _] -> case string:tokens(S, " \t") of
+ [_, _, _, Free | _] -> list_to_integer(Free) * 1024;
+ _ -> exit({unparseable, Str})
+ end;
+ _ -> exit({unparseable, Str})
+ end.
+
+parse_free_win32(CommandResult) ->
+ LastLine = lists:last(string:tokens(CommandResult, "\r\n")),
+ {match, [Free]} = re:run(lists:reverse(LastLine), "(\\d+)",
+ [{capture, all_but_first, list}]),
+ list_to_integer(lists:reverse(Free)).
+
+interpret_limit({mem_relative, Relative})
+ when is_number(Relative) ->
+ round(Relative * vm_memory_monitor:get_total_memory());
+interpret_limit(Absolute) ->
+ case rabbit_resource_monitor_misc:parse_information_unit(Absolute) of
+ {ok, ParsedAbsolute} -> ParsedAbsolute;
+ {error, parse_error} ->
+ rabbit_log:error("Unable to parse disk_free_limit value ~p",
+ [Absolute]),
+ ?DEFAULT_DISK_FREE_LIMIT
+ end.
+
+emit_update_info(StateStr, CurrentFree, Limit) ->
+ rabbit_log:info(
+ "Free disk space is ~s. Free bytes: ~p. Limit: ~p~n",
+ [StateStr, CurrentFree, Limit]).
+
+start_timer(State) ->
+ State#state{timer = erlang:send_after(interval(State), self(), update)}.
+
+interval(#state{alarmed = true,
+ max_interval = MaxInterval}) ->
+ MaxInterval;
+interval(#state{limit = Limit,
+ actual = Actual,
+ min_interval = MinInterval,
+ max_interval = MaxInterval}) ->
+ IdealInterval = 2 * (Actual - Limit) / ?FAST_RATE,
+ trunc(erlang:max(MinInterval, erlang:min(MaxInterval, IdealInterval))).
+
+enable(#state{retries = 0} = State) ->
+ State;
+enable(#state{dir = Dir, interval = Interval, limit = Limit, retries = Retries}
+ = State) ->
+ case {catch get_disk_free(Dir),
+ vm_memory_monitor:get_total_memory()} of
+ {N1, N2} when is_integer(N1), is_integer(N2) ->
+ rabbit_log:info("Enabling free disk space monitoring~n", []),
+ start_timer(set_disk_limits(State, Limit));
+ Err ->
+ rabbit_log:info("Free disk space monitor encountered an error "
+ "(e.g. failed to parse output from OS tools): ~p, retries left: ~b~n",
+ [Err, Retries]),
+ erlang:send_after(Interval, self(), try_enable),
+ State#state{enabled = false}
+ end.
diff --git a/deps/rabbit/src/rabbit_epmd_monitor.erl b/deps/rabbit/src/rabbit_epmd_monitor.erl
new file mode 100644
index 0000000000..938826dba6
--- /dev/null
+++ b/deps/rabbit/src/rabbit_epmd_monitor.erl
@@ -0,0 +1,104 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_epmd_monitor).
+
+-behaviour(gen_server).
+
+-export([start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+-record(state, {timer, mod, me, host, port}).
+
+-define(SERVER, ?MODULE).
+-define(CHECK_FREQUENCY, 60000).
+
+%%----------------------------------------------------------------------------
+%% It's possible for epmd to be killed out from underneath us. If that
+%% happens, then obviously clustering and rabbitmqctl stop
+%% working. This process checks up on epmd and restarts it /
+%% re-registers us with it if it has gone away.
+%%
+%% How could epmd be killed?
+%%
+%% 1) The most popular way for this to happen is when running as a
+%% Windows service. The user starts rabbitmqctl first, and this starts
+%% epmd under the user's account. When they log out epmd is killed.
+%%
+%% 2) Some packagings of (non-RabbitMQ?) Erlang apps might do "killall
+%% epmd" as a shutdown or uninstall step.
+%% ----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+init([]) ->
+ {Me, Host} = rabbit_nodes:parts(node()),
+ Mod = net_kernel:epmd_module(),
+ {ok, Port} = handle_port_please(init, Mod:port_please(Me, Host), Me, undefined),
+ State = #state{mod = Mod, me = Me, host = Host, port = Port},
+ {ok, ensure_timer(State)}.
+
+handle_call(_Request, _From, State) ->
+ {noreply, State}.
+
+handle_cast(check, State0) ->
+ {ok, State1} = check_epmd(State0),
+ {noreply, ensure_timer(State1#state{timer = undefined})};
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+handle_info(check, State0) ->
+ {ok, State1} = check_epmd(State0),
+ {noreply, ensure_timer(State1#state{timer = undefined})};
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%----------------------------------------------------------------------------
+
+ensure_timer(State) ->
+ rabbit_misc:ensure_timer(State, #state.timer, ?CHECK_FREQUENCY, check).
+
+check_epmd(State = #state{mod = Mod,
+ me = Me,
+ host = Host,
+ port = Port0}) ->
+ rabbit_log:debug("Asked to [re-]register this node (~s@~s) with epmd...", [Me, Host]),
+ {ok, Port1} = handle_port_please(check, Mod:port_please(Me, Host), Me, Port0),
+ rabbit_nodes:ensure_epmd(),
+ Mod:register_node(Me, Port1),
+ rabbit_log:debug("[Re-]registered this node (~s@~s) with epmd at port ~p", [Me, Host, Port1]),
+ {ok, State#state{port = Port1}}.
+
+handle_port_please(init, noport, Me, Port) ->
+ rabbit_log:info("epmd does not know us, re-registering as ~s~n", [Me]),
+ {ok, Port};
+handle_port_please(check, noport, Me, Port) ->
+ rabbit_log:warning("epmd does not know us, re-registering ~s at port ~b~n", [Me, Port]),
+ {ok, Port};
+handle_port_please(_, closed, _Me, Port) ->
+ rabbit_log:error("epmd monitor failed to retrieve our port from epmd: closed"),
+ {ok, Port};
+handle_port_please(init, {port, NewPort, _Version}, _Me, _Port) ->
+ rabbit_log:info("epmd monitor knows us, inter-node communication (distribution) port: ~p", [NewPort]),
+ {ok, NewPort};
+handle_port_please(check, {port, NewPort, _Version}, _Me, _Port) ->
+ {ok, NewPort};
+handle_port_please(_, {error, Error}, _Me, Port) ->
+ rabbit_log:error("epmd monitor failed to retrieve our port from epmd: ~p", [Error]),
+ {ok, Port}.
diff --git a/deps/rabbit/src/rabbit_event_consumer.erl b/deps/rabbit/src/rabbit_event_consumer.erl
new file mode 100644
index 0000000000..489d39312e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_event_consumer.erl
@@ -0,0 +1,197 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_event_consumer).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([register/4]).
+-export([init/1, handle_call/2, handle_event/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-record(state, {pid, ref, monitor, pattern}).
+
+%%----------------------------------------------------------------------------
+
+register(Pid, Ref, Duration, Pattern) ->
+ case gen_event:add_handler(rabbit_event, ?MODULE, [Pid, Ref, Duration, Pattern]) of
+ ok ->
+ {ok, Ref};
+ Error ->
+ Error
+ end.
+
+%%----------------------------------------------------------------------------
+
+init([Pid, Ref, Duration, Pattern]) ->
+ MRef = erlang:monitor(process, Pid),
+ case Duration of
+ infinity -> infinity;
+ _ -> erlang:send_after(Duration * 1000, self(), rabbit_event_consumer_timeout)
+ end,
+ {ok, #state{pid = Pid, ref = Ref, monitor = MRef, pattern = Pattern}}.
+
+handle_call(_Request, State) -> {ok, not_understood, State}.
+
+handle_event(#event{type = Type,
+ props = Props,
+ timestamp = TS,
+ reference = none}, #state{pid = Pid,
+ ref = Ref,
+ pattern = Pattern} = State) ->
+ case key(Type) of
+ ignore -> ok;
+ Key -> case re:run(Key, Pattern, [{capture, none}]) of
+ match ->
+ Data = [{'event', Key}] ++
+ fmt_proplist([{'timestamp_in_ms', TS} | Props]),
+ Pid ! {Ref, Data, confinue};
+ _ ->
+ ok
+ end
+ end,
+ {ok, State};
+handle_event(_Event, State) ->
+ {ok, State}.
+
+handle_info({'DOWN', MRef, _, _, _}, #state{monitor = MRef}) ->
+ remove_handler;
+handle_info(rabbit_event_consumer_timeout, #state{pid = Pid, ref = Ref}) ->
+ Pid ! {Ref, <<>>, finished},
+ remove_handler;
+handle_info(_Info, State) ->
+ {ok, State}.
+
+terminate(_Arg, #state{monitor = MRef}) ->
+ erlang:demonitor(MRef),
+ ok.
+
+code_change(_OldVsn, State, _Extra) -> {ok, State}.
+
+%%----------------------------------------------------------------------------
+
+%% pattern matching is way more efficient that the string operations,
+%% let's use all the keys we're aware of to speed up the handler.
+%% Any unknown or new one will be processed as before (see last function clause).
+key(queue_deleted) ->
+ <<"queue.deleted">>;
+key(queue_created) ->
+ <<"queue.created">>;
+key(exchange_created) ->
+ <<"exchange.created">>;
+key(exchange_deleted) ->
+ <<"exchange.deleted">>;
+key(binding_created) ->
+ <<"binding.created">>;
+key(connection_created) ->
+ <<"connection.created">>;
+key(connection_closed) ->
+ <<"connection.closed">>;
+key(channel_created) ->
+ <<"channel.created">>;
+key(channel_closed) ->
+ <<"channel.closed">>;
+key(consumer_created) ->
+ <<"consumer.created">>;
+key(consumer_deleted) ->
+ <<"consumer.deleted">>;
+key(queue_stats) ->
+ ignore;
+key(connection_stats) ->
+ ignore;
+key(policy_set) ->
+ <<"policy.set">>;
+key(policy_cleared) ->
+ <<"policy.cleared">>;
+key(parameter_set) ->
+ <<"parameter.set">>;
+key(parameter_cleared) ->
+ <<"parameter.cleared">>;
+key(vhost_created) ->
+ <<"vhost.created">>;
+key(vhost_deleted) ->
+ <<"vhost.deleted">>;
+key(vhost_limits_set) ->
+ <<"vhost.limits.set">>;
+key(vhost_limits_cleared) ->
+ <<"vhost.limits.cleared">>;
+key(user_authentication_success) ->
+ <<"user.authentication.success">>;
+key(user_authentication_failure) ->
+ <<"user.authentication.failure">>;
+key(user_created) ->
+ <<"user.created">>;
+key(user_deleted) ->
+ <<"user.deleted">>;
+key(user_password_changed) ->
+ <<"user.password.changed">>;
+key(user_password_cleared) ->
+ <<"user.password.cleared">>;
+key(user_tags_set) ->
+ <<"user.tags.set">>;
+key(permission_created) ->
+ <<"permission.created">>;
+key(permission_deleted) ->
+ <<"permission.deleted">>;
+key(topic_permission_created) ->
+ <<"topic.permission.created">>;
+key(topic_permission_deleted) ->
+ <<"topic.permission.deleted">>;
+key(alarm_set) ->
+ <<"alarm.set">>;
+key(alarm_cleared) ->
+ <<"alarm.cleared">>;
+key(shovel_worker_status) ->
+ <<"shovel.worker.status">>;
+key(shovel_worker_removed) ->
+ <<"shovel.worker.removed">>;
+key(federation_link_status) ->
+ <<"federation.link.status">>;
+key(federation_link_removed) ->
+ <<"federation.link.removed">>;
+key(S) ->
+ case string:tokens(atom_to_list(S), "_") of
+ [_, "stats"] -> ignore;
+ Tokens -> list_to_binary(string:join(Tokens, "."))
+ end.
+
+fmt_proplist(Props) ->
+ lists:foldl(fun({K, V}, Acc) ->
+ case fmt(K, V) of
+ L when is_list(L) -> lists:append(L, Acc);
+ T -> [T | Acc]
+ end
+ end, [], Props).
+
+fmt(K, #resource{virtual_host = VHost,
+ name = Name}) -> [{K, Name},
+ {'vhost', VHost}];
+fmt(K, true) -> {K, true};
+fmt(K, false) -> {K, false};
+fmt(K, V) when is_atom(V) -> {K, atom_to_binary(V, utf8)};
+fmt(K, V) when is_integer(V) -> {K, V};
+fmt(K, V) when is_number(V) -> {K, V};
+fmt(K, V) when is_binary(V) -> {K, V};
+fmt(K, [{_, _}|_] = Vs) -> {K, fmt_proplist(Vs)};
+fmt(K, Vs) when is_list(Vs) -> {K, [fmt(V) || V <- Vs]};
+fmt(K, V) when is_pid(V) -> {K, list_to_binary(rabbit_misc:pid_to_string(V))};
+fmt(K, V) -> {K,
+ list_to_binary(
+ rabbit_misc:format("~1000000000p", [V]))}.
+
+%% Exactly the same as fmt/2, duplicated only for performance issues
+fmt(true) -> true;
+fmt(false) -> false;
+fmt(V) when is_atom(V) -> atom_to_binary(V, utf8);
+fmt(V) when is_integer(V) -> V;
+fmt(V) when is_number(V) -> V;
+fmt(V) when is_binary(V) -> V;
+fmt([{_, _}|_] = Vs) -> fmt_proplist(Vs);
+fmt(Vs) when is_list(Vs) -> [fmt(V) || V <- Vs];
+fmt(V) when is_pid(V) -> list_to_binary(rabbit_misc:pid_to_string(V));
+fmt(V) -> list_to_binary(
+ rabbit_misc:format("~1000000000p", [V])).
diff --git a/deps/rabbit/src/rabbit_exchange.erl b/deps/rabbit/src/rabbit_exchange.erl
new file mode 100644
index 0000000000..129b2b868b
--- /dev/null
+++ b/deps/rabbit/src/rabbit_exchange.erl
@@ -0,0 +1,592 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_exchange).
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+
+-export([recover/1, policy_changed/2, callback/4, declare/7,
+ assert_equivalence/6, assert_args_equivalence/2, check_type/1,
+ lookup/1, lookup_many/1, lookup_or_die/1, list/0, list/1, lookup_scratch/2,
+ update_scratch/3, update_decorators/1, immutable/1,
+ info_keys/0, info/1, info/2, info_all/1, info_all/2, info_all/4,
+ route/2, delete/3, validate_binding/2, count/0]).
+-export([list_names/0, is_amq_prefixed/1]).
+%% these must be run inside a mnesia tx
+-export([maybe_auto_delete/2, serial/1, peek_serial/1, update/2]).
+
+%%----------------------------------------------------------------------------
+
+-export_type([name/0, type/0]).
+
+-type name() :: rabbit_types:r('exchange').
+-type type() :: atom().
+-type fun_name() :: atom().
+
+%%----------------------------------------------------------------------------
+
+-define(INFO_KEYS, [name, type, durable, auto_delete, internal, arguments,
+ policy, user_who_performed_action]).
+
+-spec recover(rabbit_types:vhost()) -> [name()].
+
+recover(VHost) ->
+ Xs = rabbit_misc:table_filter(
+ fun (#exchange{name = XName}) ->
+ XName#resource.virtual_host =:= VHost andalso
+ mnesia:read({rabbit_exchange, XName}) =:= []
+ end,
+ fun (X, Tx) ->
+ X1 = case Tx of
+ true -> store_ram(X);
+ false -> rabbit_exchange_decorator:set(X)
+ end,
+ callback(X1, create, map_create_tx(Tx), [X1])
+ end,
+ rabbit_durable_exchange),
+ [XName || #exchange{name = XName} <- Xs].
+
+-spec callback
+ (rabbit_types:exchange(), fun_name(),
+ fun((boolean()) -> non_neg_integer()) | atom(), [any()]) -> 'ok'.
+
+callback(X = #exchange{type = XType,
+ decorators = Decorators}, Fun, Serial0, Args) ->
+ Serial = if is_function(Serial0) -> Serial0;
+ is_atom(Serial0) -> fun (_Bool) -> Serial0 end
+ end,
+ [ok = apply(M, Fun, [Serial(M:serialise_events(X)) | Args]) ||
+ M <- rabbit_exchange_decorator:select(all, Decorators)],
+ Module = type_to_module(XType),
+ apply(Module, Fun, [Serial(Module:serialise_events()) | Args]).
+
+-spec policy_changed
+ (rabbit_types:exchange(), rabbit_types:exchange()) -> 'ok'.
+
+policy_changed(X = #exchange{type = XType,
+ decorators = Decorators},
+ X1 = #exchange{decorators = Decorators1}) ->
+ D = rabbit_exchange_decorator:select(all, Decorators),
+ D1 = rabbit_exchange_decorator:select(all, Decorators1),
+ DAll = lists:usort(D ++ D1),
+ [ok = M:policy_changed(X, X1) || M <- [type_to_module(XType) | DAll]],
+ ok.
+
+serialise_events(X = #exchange{type = Type, decorators = Decorators}) ->
+ lists:any(fun (M) -> M:serialise_events(X) end,
+ rabbit_exchange_decorator:select(all, Decorators))
+ orelse (type_to_module(Type)):serialise_events().
+
+-spec serial(rabbit_types:exchange()) ->
+ fun((boolean()) -> 'none' | pos_integer()).
+
+serial(#exchange{name = XName} = X) ->
+ Serial = case serialise_events(X) of
+ true -> next_serial(XName);
+ false -> none
+ end,
+ fun (true) -> Serial;
+ (false) -> none
+ end.
+
+-spec is_amq_prefixed(rabbit_types:exchange() | binary()) -> boolean().
+
+is_amq_prefixed(Name) when is_binary(Name) ->
+ case re:run(Name, <<"^amq\.">>) of
+ nomatch -> false;
+ {match, _} -> true
+ end;
+is_amq_prefixed(#exchange{name = #resource{name = <<>>}}) ->
+ false;
+is_amq_prefixed(#exchange{name = #resource{name = Name}}) ->
+ is_amq_prefixed(Name).
+
+-spec declare
+ (name(), type(), boolean(), boolean(), boolean(),
+ rabbit_framing:amqp_table(), rabbit_types:username())
+ -> rabbit_types:exchange().
+
+declare(XName, Type, Durable, AutoDelete, Internal, Args, Username) ->
+ X = rabbit_exchange_decorator:set(
+ rabbit_policy:set(#exchange{name = XName,
+ type = Type,
+ durable = Durable,
+ auto_delete = AutoDelete,
+ internal = Internal,
+ arguments = Args,
+ options = #{user => Username}})),
+ XT = type_to_module(Type),
+ %% We want to upset things if it isn't ok
+ ok = XT:validate(X),
+ %% Avoid a channel exception if there's a race condition
+ %% with an exchange.delete operation.
+ %%
+ %% See rabbitmq/rabbitmq-federation#7.
+ case rabbit_runtime_parameters:lookup(XName#resource.virtual_host,
+ ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT,
+ XName#resource.name) of
+ not_found ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ case mnesia:wread({rabbit_exchange, XName}) of
+ [] ->
+ {new, store(X)};
+ [ExistingX] ->
+ {existing, ExistingX}
+ end
+ end,
+ fun ({new, Exchange}, Tx) ->
+ ok = callback(X, create, map_create_tx(Tx), [Exchange]),
+ rabbit_event:notify_if(not Tx, exchange_created, info(Exchange)),
+ Exchange;
+ ({existing, Exchange}, _Tx) ->
+ Exchange;
+ (Err, _Tx) ->
+ Err
+ end);
+ _ ->
+ rabbit_log:warning("ignoring exchange.declare for exchange ~p,
+ exchange.delete in progress~n.", [XName]),
+ X
+ end.
+
+map_create_tx(true) -> transaction;
+map_create_tx(false) -> none.
+
+
+store(X = #exchange{durable = true}) ->
+ mnesia:write(rabbit_durable_exchange, X#exchange{decorators = undefined},
+ write),
+ store_ram(X);
+store(X = #exchange{durable = false}) ->
+ store_ram(X).
+
+store_ram(X) ->
+ X1 = rabbit_exchange_decorator:set(X),
+ ok = mnesia:write(rabbit_exchange, rabbit_exchange_decorator:set(X1),
+ write),
+ X1.
+
+%% Used with binaries sent over the wire; the type may not exist.
+
+-spec check_type
+ (binary()) -> atom() | rabbit_types:connection_exit().
+
+check_type(TypeBin) ->
+ case rabbit_registry:binary_to_type(rabbit_data_coercion:to_binary(TypeBin)) of
+ {error, not_found} ->
+ rabbit_misc:protocol_error(
+ command_invalid, "unknown exchange type '~s'", [TypeBin]);
+ T ->
+ case rabbit_registry:lookup_module(exchange, T) of
+ {error, not_found} -> rabbit_misc:protocol_error(
+ command_invalid,
+ "invalid exchange type '~s'", [T]);
+ {ok, _Module} -> T
+ end
+ end.
+
+-spec assert_equivalence
+ (rabbit_types:exchange(), atom(), boolean(), boolean(), boolean(),
+ rabbit_framing:amqp_table())
+ -> 'ok' | rabbit_types:connection_exit().
+
+assert_equivalence(X = #exchange{ name = XName,
+ durable = Durable,
+ auto_delete = AutoDelete,
+ internal = Internal,
+ type = Type},
+ ReqType, ReqDurable, ReqAutoDelete, ReqInternal, ReqArgs) ->
+ AFE = fun rabbit_misc:assert_field_equivalence/4,
+ AFE(Type, ReqType, XName, type),
+ AFE(Durable, ReqDurable, XName, durable),
+ AFE(AutoDelete, ReqAutoDelete, XName, auto_delete),
+ AFE(Internal, ReqInternal, XName, internal),
+ (type_to_module(Type)):assert_args_equivalence(X, ReqArgs).
+
+-spec assert_args_equivalence
+ (rabbit_types:exchange(), rabbit_framing:amqp_table())
+ -> 'ok' | rabbit_types:connection_exit().
+
+assert_args_equivalence(#exchange{ name = Name, arguments = Args },
+ RequiredArgs) ->
+ %% The spec says "Arguments are compared for semantic
+ %% equivalence". The only arg we care about is
+ %% "alternate-exchange".
+ rabbit_misc:assert_args_equivalence(Args, RequiredArgs, Name,
+ [<<"alternate-exchange">>]).
+
+-spec lookup
+ (name()) -> rabbit_types:ok(rabbit_types:exchange()) |
+ rabbit_types:error('not_found').
+
+lookup(Name) ->
+ rabbit_misc:dirty_read({rabbit_exchange, Name}).
+
+
+-spec lookup_many([name()]) -> [rabbit_types:exchange()].
+
+lookup_many([]) -> [];
+lookup_many([Name]) -> ets:lookup(rabbit_exchange, Name);
+lookup_many(Names) when is_list(Names) ->
+ %% Normally we'd call mnesia:dirty_read/1 here, but that is quite
+ %% expensive for reasons explained in rabbit_misc:dirty_read/1.
+ lists:append([ets:lookup(rabbit_exchange, Name) || Name <- Names]).
+
+
+-spec lookup_or_die
+ (name()) -> rabbit_types:exchange() |
+ rabbit_types:channel_exit().
+
+lookup_or_die(Name) ->
+ case lookup(Name) of
+ {ok, X} -> X;
+ {error, not_found} -> rabbit_amqqueue:not_found(Name)
+ end.
+
+-spec list() -> [rabbit_types:exchange()].
+
+list() -> mnesia:dirty_match_object(rabbit_exchange, #exchange{_ = '_'}).
+
+-spec count() -> non_neg_integer().
+
+count() ->
+ mnesia:table_info(rabbit_exchange, size).
+
+-spec list_names() -> [rabbit_exchange:name()].
+
+list_names() -> mnesia:dirty_all_keys(rabbit_exchange).
+
+%% Not dirty_match_object since that would not be transactional when used in a
+%% tx context
+
+-spec list(rabbit_types:vhost()) -> [rabbit_types:exchange()].
+
+list(VHostPath) ->
+ mnesia:async_dirty(
+ fun () ->
+ mnesia:match_object(
+ rabbit_exchange,
+ #exchange{name = rabbit_misc:r(VHostPath, exchange), _ = '_'},
+ read)
+ end).
+
+-spec lookup_scratch(name(), atom()) ->
+ rabbit_types:ok(term()) |
+ rabbit_types:error('not_found').
+
+lookup_scratch(Name, App) ->
+ case lookup(Name) of
+ {ok, #exchange{scratches = undefined}} ->
+ {error, not_found};
+ {ok, #exchange{scratches = Scratches}} ->
+ case orddict:find(App, Scratches) of
+ {ok, Value} -> {ok, Value};
+ error -> {error, not_found}
+ end;
+ {error, not_found} ->
+ {error, not_found}
+ end.
+
+-spec update_scratch(name(), atom(), fun((any()) -> any())) -> 'ok'.
+
+update_scratch(Name, App, Fun) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ update(Name,
+ fun(X = #exchange{scratches = Scratches0}) ->
+ Scratches1 = case Scratches0 of
+ undefined -> orddict:new();
+ _ -> Scratches0
+ end,
+ Scratch = case orddict:find(App, Scratches1) of
+ {ok, S} -> S;
+ error -> undefined
+ end,
+ Scratches2 = orddict:store(
+ App, Fun(Scratch), Scratches1),
+ X#exchange{scratches = Scratches2}
+ end),
+ ok
+ end).
+
+-spec update_decorators(name()) -> 'ok'.
+
+update_decorators(Name) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ case mnesia:wread({rabbit_exchange, Name}) of
+ [X] -> store_ram(X),
+ ok;
+ [] -> ok
+ end
+ end).
+
+-spec update
+ (name(),
+ fun((rabbit_types:exchange()) -> rabbit_types:exchange()))
+ -> not_found | rabbit_types:exchange().
+
+update(Name, Fun) ->
+ case mnesia:wread({rabbit_exchange, Name}) of
+ [X] -> X1 = Fun(X),
+ store(X1);
+ [] -> not_found
+ end.
+
+-spec immutable(rabbit_types:exchange()) -> rabbit_types:exchange().
+
+immutable(X) -> X#exchange{scratches = none,
+ policy = none,
+ decorators = none}.
+
+-spec info_keys() -> rabbit_types:info_keys().
+
+info_keys() -> ?INFO_KEYS.
+
+map(VHostPath, F) ->
+ %% TODO: there is scope for optimisation here, e.g. using a
+ %% cursor, parallelising the function invocation
+ lists:map(F, list(VHostPath)).
+
+infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items].
+
+i(name, #exchange{name = Name}) -> Name;
+i(type, #exchange{type = Type}) -> Type;
+i(durable, #exchange{durable = Durable}) -> Durable;
+i(auto_delete, #exchange{auto_delete = AutoDelete}) -> AutoDelete;
+i(internal, #exchange{internal = Internal}) -> Internal;
+i(arguments, #exchange{arguments = Arguments}) -> Arguments;
+i(policy, X) -> case rabbit_policy:name(X) of
+ none -> '';
+ Policy -> Policy
+ end;
+i(user_who_performed_action, #exchange{options = Opts}) ->
+ maps:get(user, Opts, ?UNKNOWN_USER);
+i(Item, #exchange{type = Type} = X) ->
+ case (type_to_module(Type)):info(X, [Item]) of
+ [{Item, I}] -> I;
+ [] -> throw({bad_argument, Item})
+ end.
+
+-spec info(rabbit_types:exchange()) -> rabbit_types:infos().
+
+info(X = #exchange{type = Type}) ->
+ infos(?INFO_KEYS, X) ++ (type_to_module(Type)):info(X).
+
+-spec info
+ (rabbit_types:exchange(), rabbit_types:info_keys())
+ -> rabbit_types:infos().
+
+info(X = #exchange{type = _Type}, Items) ->
+ infos(Items, X).
+
+-spec info_all(rabbit_types:vhost()) -> [rabbit_types:infos()].
+
+info_all(VHostPath) -> map(VHostPath, fun (X) -> info(X) end).
+
+-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys())
+ -> [rabbit_types:infos()].
+
+info_all(VHostPath, Items) -> map(VHostPath, fun (X) -> info(X, Items) end).
+
+-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys(),
+ reference(), pid())
+ -> 'ok'.
+
+info_all(VHostPath, Items, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(
+ AggregatorPid, Ref, fun(X) -> info(X, Items) end, list(VHostPath)).
+
+-spec route(rabbit_types:exchange(), rabbit_types:delivery())
+ -> [rabbit_amqqueue:name()].
+
+route(#exchange{name = #resource{virtual_host = VHost, name = RName} = XName,
+ decorators = Decorators} = X,
+ #delivery{message = #basic_message{routing_keys = RKs}} = Delivery) ->
+ case RName of
+ <<>> ->
+ RKsSorted = lists:usort(RKs),
+ [rabbit_channel:deliver_reply(RK, Delivery) ||
+ RK <- RKsSorted, virtual_reply_queue(RK)],
+ [rabbit_misc:r(VHost, queue, RK) || RK <- RKsSorted,
+ not virtual_reply_queue(RK)];
+ _ ->
+ Decs = rabbit_exchange_decorator:select(route, Decorators),
+ lists:usort(route1(Delivery, Decs, {[X], XName, []}))
+ end.
+
+virtual_reply_queue(<<"amq.rabbitmq.reply-to.", _/binary>>) -> true;
+virtual_reply_queue(_) -> false.
+
+route1(_, _, {[], _, QNames}) ->
+ QNames;
+route1(Delivery, Decorators,
+ {[X = #exchange{type = Type} | WorkList], SeenXs, QNames}) ->
+ ExchangeDests = (type_to_module(Type)):route(X, Delivery),
+ DecorateDests = process_decorators(X, Decorators, Delivery),
+ AlternateDests = process_alternate(X, ExchangeDests),
+ route1(Delivery, Decorators,
+ lists:foldl(fun process_route/2, {WorkList, SeenXs, QNames},
+ AlternateDests ++ DecorateDests ++ ExchangeDests)).
+
+process_alternate(X = #exchange{name = XName}, []) ->
+ case rabbit_policy:get_arg(
+ <<"alternate-exchange">>, <<"alternate-exchange">>, X) of
+ undefined -> [];
+ AName -> [rabbit_misc:r(XName, exchange, AName)]
+ end;
+process_alternate(_X, _Results) ->
+ [].
+
+process_decorators(_, [], _) -> %% optimisation
+ [];
+process_decorators(X, Decorators, Delivery) ->
+ lists:append([Decorator:route(X, Delivery) || Decorator <- Decorators]).
+
+process_route(#resource{kind = exchange} = XName,
+ {_WorkList, XName, _QNames} = Acc) ->
+ Acc;
+process_route(#resource{kind = exchange} = XName,
+ {WorkList, #resource{kind = exchange} = SeenX, QNames}) ->
+ {cons_if_present(XName, WorkList),
+ gb_sets:from_list([SeenX, XName]), QNames};
+process_route(#resource{kind = exchange} = XName,
+ {WorkList, SeenXs, QNames} = Acc) ->
+ case gb_sets:is_element(XName, SeenXs) of
+ true -> Acc;
+ false -> {cons_if_present(XName, WorkList),
+ gb_sets:add_element(XName, SeenXs), QNames}
+ end;
+process_route(#resource{kind = queue} = QName,
+ {WorkList, SeenXs, QNames}) ->
+ {WorkList, SeenXs, [QName | QNames]}.
+
+cons_if_present(XName, L) ->
+ case lookup(XName) of
+ {ok, X} -> [X | L];
+ {error, not_found} -> L
+ end.
+
+call_with_exchange(XName, Fun) ->
+ rabbit_misc:execute_mnesia_tx_with_tail(
+ fun () -> case mnesia:read({rabbit_exchange, XName}) of
+ [] -> rabbit_misc:const({error, not_found});
+ [X] -> Fun(X)
+ end
+ end).
+
+-spec delete
+ (name(), 'true', rabbit_types:username()) ->
+ 'ok'| rabbit_types:error('not_found' | 'in_use');
+ (name(), 'false', rabbit_types:username()) ->
+ 'ok' | rabbit_types:error('not_found').
+
+delete(XName, IfUnused, Username) ->
+ Fun = case IfUnused of
+ true -> fun conditional_delete/2;
+ false -> fun unconditional_delete/2
+ end,
+ try
+ %% guard exchange.declare operations from failing when there's
+ %% a race condition between it and an exchange.delete.
+ %%
+ %% see rabbitmq/rabbitmq-federation#7
+ rabbit_runtime_parameters:set(XName#resource.virtual_host,
+ ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT,
+ XName#resource.name, true, Username),
+ call_with_exchange(
+ XName,
+ fun (X) ->
+ case Fun(X, false) of
+ {deleted, X, Bs, Deletions} ->
+ rabbit_binding:process_deletions(
+ rabbit_binding:add_deletion(
+ XName, {X, deleted, Bs}, Deletions), Username);
+ {error, _InUseOrNotFound} = E ->
+ rabbit_misc:const(E)
+ end
+ end)
+ after
+ rabbit_runtime_parameters:clear(XName#resource.virtual_host,
+ ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT,
+ XName#resource.name, Username)
+ end.
+
+-spec validate_binding
+ (rabbit_types:exchange(), rabbit_types:binding())
+ -> rabbit_types:ok_or_error({'binding_invalid', string(), [any()]}).
+
+validate_binding(X = #exchange{type = XType}, Binding) ->
+ Module = type_to_module(XType),
+ Module:validate_binding(X, Binding).
+
+-spec maybe_auto_delete
+ (rabbit_types:exchange(), boolean())
+ -> 'not_deleted' | {'deleted', rabbit_binding:deletions()}.
+
+maybe_auto_delete(#exchange{auto_delete = false}, _OnlyDurable) ->
+ not_deleted;
+maybe_auto_delete(#exchange{auto_delete = true} = X, OnlyDurable) ->
+ case conditional_delete(X, OnlyDurable) of
+ {error, in_use} -> not_deleted;
+ {deleted, X, [], Deletions} -> {deleted, Deletions}
+ end.
+
+conditional_delete(X = #exchange{name = XName}, OnlyDurable) ->
+ case rabbit_binding:has_for_source(XName) of
+ false -> internal_delete(X, OnlyDurable, false);
+ true -> {error, in_use}
+ end.
+
+unconditional_delete(X, OnlyDurable) ->
+ internal_delete(X, OnlyDurable, true).
+
+internal_delete(X = #exchange{name = XName}, OnlyDurable, RemoveBindingsForSource) ->
+ ok = mnesia:delete({rabbit_exchange, XName}),
+ ok = mnesia:delete({rabbit_exchange_serial, XName}),
+ mnesia:delete({rabbit_durable_exchange, XName}),
+ Bindings = case RemoveBindingsForSource of
+ true -> rabbit_binding:remove_for_source(XName);
+ false -> []
+ end,
+ {deleted, X, Bindings, rabbit_binding:remove_for_destination(
+ XName, OnlyDurable)}.
+
+next_serial(XName) ->
+ Serial = peek_serial(XName, write),
+ ok = mnesia:write(rabbit_exchange_serial,
+ #exchange_serial{name = XName, next = Serial + 1}, write),
+ Serial.
+
+-spec peek_serial(name()) -> pos_integer() | 'undefined'.
+
+peek_serial(XName) -> peek_serial(XName, read).
+
+peek_serial(XName, LockType) ->
+ case mnesia:read(rabbit_exchange_serial, XName, LockType) of
+ [#exchange_serial{next = Serial}] -> Serial;
+ _ -> 1
+ end.
+
+invalid_module(T) ->
+ rabbit_log:warning("Could not find exchange type ~s.~n", [T]),
+ put({xtype_to_module, T}, rabbit_exchange_type_invalid),
+ rabbit_exchange_type_invalid.
+
+%% Used with atoms from records; e.g., the type is expected to exist.
+type_to_module(T) ->
+ case get({xtype_to_module, T}) of
+ undefined ->
+ case rabbit_registry:lookup_module(exchange, T) of
+ {ok, Module} -> put({xtype_to_module, T}, Module),
+ Module;
+ {error, not_found} -> invalid_module(T)
+ end;
+ Module ->
+ Module
+ end.
diff --git a/deps/rabbit/src/rabbit_exchange_decorator.erl b/deps/rabbit/src/rabbit_exchange_decorator.erl
new file mode 100644
index 0000000000..02d0258d3c
--- /dev/null
+++ b/deps/rabbit/src/rabbit_exchange_decorator.erl
@@ -0,0 +1,105 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_exchange_decorator).
+
+-include("rabbit.hrl").
+
+-export([select/2, set/1]).
+
+-behaviour(rabbit_registry_class).
+
+-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]).
+
+%% This is like an exchange type except that:
+%%
+%% 1) It applies to all exchanges as soon as it is installed, therefore
+%% 2) It is not allowed to affect validation, so no validate/1 or
+%% assert_args_equivalence/2
+%%
+%% It's possible in the future we might make decorators
+%% able to manipulate messages as they are published.
+
+-type(tx() :: 'transaction' | 'none').
+-type(serial() :: pos_integer() | tx()).
+
+-callback description() -> [proplists:property()].
+
+%% Should Rabbit ensure that all binding events that are
+%% delivered to an individual exchange can be serialised? (they
+%% might still be delivered out of order, but there'll be a
+%% serial number).
+-callback serialise_events(rabbit_types:exchange()) -> boolean().
+
+%% called after declaration and recovery
+-callback create(tx(), rabbit_types:exchange()) -> 'ok'.
+
+%% called after exchange (auto)deletion.
+-callback delete(tx(), rabbit_types:exchange(), [rabbit_types:binding()]) ->
+ 'ok'.
+
+%% called when the policy attached to this exchange changes.
+-callback policy_changed(rabbit_types:exchange(), rabbit_types:exchange()) ->
+ 'ok'.
+
+%% called after a binding has been added or recovered
+-callback add_binding(serial(), rabbit_types:exchange(),
+ rabbit_types:binding()) -> 'ok'.
+
+%% called after bindings have been deleted.
+-callback remove_bindings(serial(), rabbit_types:exchange(),
+ [rabbit_types:binding()]) -> 'ok'.
+
+%% Allows additional destinations to be added to the routing decision.
+-callback route(rabbit_types:exchange(), rabbit_types:delivery()) ->
+ [rabbit_amqqueue:name() | rabbit_exchange:name()].
+
+%% Whether the decorator wishes to receive callbacks for the exchange
+%% none:no callbacks, noroute:all callbacks except route, all:all callbacks
+-callback active_for(rabbit_types:exchange()) -> 'none' | 'noroute' | 'all'.
+
+%%----------------------------------------------------------------------------
+
+added_to_rabbit_registry(_Type, _ModuleName) ->
+ [maybe_recover(X) || X <- rabbit_exchange:list()],
+ ok.
+removed_from_rabbit_registry(_Type) ->
+ [maybe_recover(X) || X <- rabbit_exchange:list()],
+ ok.
+
+%% select a subset of active decorators
+select(all, {Route, NoRoute}) -> filter(Route ++ NoRoute);
+select(route, {Route, _NoRoute}) -> filter(Route);
+select(raw, {Route, NoRoute}) -> Route ++ NoRoute.
+
+filter(Modules) ->
+ [M || M <- Modules, code:which(M) =/= non_existing].
+
+set(X) ->
+ Decs = lists:foldl(fun (D, {Route, NoRoute}) ->
+ ActiveFor = D:active_for(X),
+ {cons_if_eq(all, ActiveFor, D, Route),
+ cons_if_eq(noroute, ActiveFor, D, NoRoute)}
+ end, {[], []}, list()),
+ X#exchange{decorators = Decs}.
+
+list() -> [M || {_, M} <- rabbit_registry:lookup_all(exchange_decorator)].
+
+cons_if_eq(Select, Select, Item, List) -> [Item | List];
+cons_if_eq(_Select, _Other, _Item, List) -> List.
+
+maybe_recover(X = #exchange{name = Name,
+ decorators = Decs}) ->
+ #exchange{decorators = Decs1} = set(X),
+ Old = lists:sort(select(all, Decs)),
+ New = lists:sort(select(all, Decs1)),
+ case New of
+ Old -> ok;
+ _ -> %% TODO create a tx here for non-federation decorators
+ [M:create(none, X) || M <- New -- Old],
+ rabbit_exchange:update_decorators(Name)
+ end.
diff --git a/deps/rabbit/src/rabbit_exchange_parameters.erl b/deps/rabbit/src/rabbit_exchange_parameters.erl
new file mode 100644
index 0000000000..f9de648cfa
--- /dev/null
+++ b/deps/rabbit/src/rabbit_exchange_parameters.erl
@@ -0,0 +1,39 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_exchange_parameters).
+
+-behaviour(rabbit_runtime_parameter).
+
+-include("rabbit.hrl").
+
+-export([register/0]).
+-export([validate/5, notify/5, notify_clear/4]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "exchange parameters"},
+ {mfa, {rabbit_exchange_parameters, register, []}},
+ {requires, rabbit_registry},
+ {enables, recovery}]}).
+
+register() ->
+ rabbit_registry:register(runtime_parameter,
+ ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, ?MODULE),
+ %% ensure there are no leftovers from before node restart/crash
+ rabbit_runtime_parameters:clear_component(
+ ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT,
+ ?INTERNAL_USER),
+ ok.
+
+validate(_VHost, ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, _Name, _Term, _User) ->
+ ok.
+
+notify(_VHost, ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, _Name, _Term, _Username) ->
+ ok.
+
+notify_clear(_VHost, ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, _Name, _Username) ->
+ ok.
diff --git a/deps/rabbit/src/rabbit_exchange_type_direct.erl b/deps/rabbit/src/rabbit_exchange_type_direct.erl
new file mode 100644
index 0000000000..3f4350e7b0
--- /dev/null
+++ b/deps/rabbit/src/rabbit_exchange_type_direct.erl
@@ -0,0 +1,46 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_exchange_type_direct).
+-include("rabbit.hrl").
+
+-behaviour(rabbit_exchange_type).
+
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, validate_binding/2,
+ create/2, delete/3, policy_changed/2, add_binding/3,
+ remove_bindings/3, assert_args_equivalence/2]).
+-export([info/1, info/2]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "exchange type direct"},
+ {mfa, {rabbit_registry, register,
+ [exchange, <<"direct">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+info(_X) -> [].
+info(_X, _) -> [].
+
+description() ->
+ [{description, <<"AMQP direct exchange, as per the AMQP specification">>}].
+
+serialise_events() -> false.
+
+route(#exchange{name = Name},
+ #delivery{message = #basic_message{routing_keys = Routes}}) ->
+ rabbit_router:match_routing_key(Name, Routes).
+
+validate(_X) -> ok.
+validate_binding(_X, _B) -> ok.
+create(_Tx, _X) -> ok.
+delete(_Tx, _X, _Bs) -> ok.
+policy_changed(_X1, _X2) -> ok.
+add_binding(_Tx, _X, _B) -> ok.
+remove_bindings(_Tx, _X, _Bs) -> ok.
+assert_args_equivalence(X, Args) ->
+ rabbit_exchange:assert_args_equivalence(X, Args).
diff --git a/deps/rabbit/src/rabbit_exchange_type_fanout.erl b/deps/rabbit/src/rabbit_exchange_type_fanout.erl
new file mode 100644
index 0000000000..a8778cf0c7
--- /dev/null
+++ b/deps/rabbit/src/rabbit_exchange_type_fanout.erl
@@ -0,0 +1,45 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_exchange_type_fanout).
+-include("rabbit.hrl").
+
+-behaviour(rabbit_exchange_type).
+
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, validate_binding/2,
+ create/2, delete/3, policy_changed/2, add_binding/3,
+ remove_bindings/3, assert_args_equivalence/2]).
+-export([info/1, info/2]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "exchange type fanout"},
+ {mfa, {rabbit_registry, register,
+ [exchange, <<"fanout">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+info(_X) -> [].
+info(_X, _) -> [].
+
+description() ->
+ [{description, <<"AMQP fanout exchange, as per the AMQP specification">>}].
+
+serialise_events() -> false.
+
+route(#exchange{name = Name}, _Delivery) ->
+ rabbit_router:match_routing_key(Name, ['_']).
+
+validate(_X) -> ok.
+validate_binding(_X, _B) -> ok.
+create(_Tx, _X) -> ok.
+delete(_Tx, _X, _Bs) -> ok.
+policy_changed(_X1, _X2) -> ok.
+add_binding(_Tx, _X, _B) -> ok.
+remove_bindings(_Tx, _X, _Bs) -> ok.
+assert_args_equivalence(X, Args) ->
+ rabbit_exchange:assert_args_equivalence(X, Args).
diff --git a/deps/rabbit/src/rabbit_exchange_type_headers.erl b/deps/rabbit/src/rabbit_exchange_type_headers.erl
new file mode 100644
index 0000000000..e40195de7a
--- /dev/null
+++ b/deps/rabbit/src/rabbit_exchange_type_headers.erl
@@ -0,0 +1,136 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_exchange_type_headers).
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+
+-behaviour(rabbit_exchange_type).
+
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, validate_binding/2,
+ create/2, delete/3, policy_changed/2, add_binding/3,
+ remove_bindings/3, assert_args_equivalence/2]).
+-export([info/1, info/2]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "exchange type headers"},
+ {mfa, {rabbit_registry, register,
+ [exchange, <<"headers">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+info(_X) -> [].
+info(_X, _) -> [].
+
+description() ->
+ [{description, <<"AMQP headers exchange, as per the AMQP specification">>}].
+
+serialise_events() -> false.
+
+route(#exchange{name = Name},
+ #delivery{message = #basic_message{content = Content}}) ->
+ Headers = case (Content#content.properties)#'P_basic'.headers of
+ undefined -> [];
+ H -> rabbit_misc:sort_field_table(H)
+ end,
+ rabbit_router:match_bindings(
+ Name, fun (#binding{args = Spec}) -> headers_match(Spec, Headers) end).
+
+validate_binding(_X, #binding{args = Args}) ->
+ case rabbit_misc:table_lookup(Args, <<"x-match">>) of
+ {longstr, <<"all">>} -> ok;
+ {longstr, <<"any">>} -> ok;
+ {longstr, Other} -> {error,
+ {binding_invalid,
+ "Invalid x-match field value ~p; "
+ "expected all or any", [Other]}};
+ {Type, Other} -> {error,
+ {binding_invalid,
+ "Invalid x-match field type ~p (value ~p); "
+ "expected longstr", [Type, Other]}};
+ undefined -> ok %% [0]
+ end.
+%% [0] spec is vague on whether it can be omitted but in practice it's
+%% useful to allow people to do this
+
+parse_x_match({longstr, <<"all">>}) -> all;
+parse_x_match({longstr, <<"any">>}) -> any;
+parse_x_match(_) -> all. %% legacy; we didn't validate
+
+%% Horrendous matching algorithm. Depends for its merge-like
+%% (linear-time) behaviour on the lists:keysort
+%% (rabbit_misc:sort_field_table) that route/1 and
+%% rabbit_binding:{add,remove}/2 do.
+%%
+%% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+%% In other words: REQUIRES BOTH PATTERN AND DATA TO BE SORTED ASCENDING BY KEY.
+%% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+%%
+
+-spec headers_match
+ (rabbit_framing:amqp_table(), rabbit_framing:amqp_table()) ->
+ boolean().
+
+headers_match(Args, Data) ->
+ MK = parse_x_match(rabbit_misc:table_lookup(Args, <<"x-match">>)),
+ headers_match(Args, Data, true, false, MK).
+
+% A bit less horrendous algorithm :)
+headers_match(_, _, false, _, all) -> false;
+headers_match(_, _, _, true, any) -> true;
+
+% No more bindings, return current state
+headers_match([], _Data, AllMatch, _AnyMatch, all) -> AllMatch;
+headers_match([], _Data, _AllMatch, AnyMatch, any) -> AnyMatch;
+
+% Delete bindings starting with x-
+headers_match([{<<"x-", _/binary>>, _PT, _PV} | PRest], Data,
+ AllMatch, AnyMatch, MatchKind) ->
+ headers_match(PRest, Data, AllMatch, AnyMatch, MatchKind);
+
+% No more data, but still bindings, false with all
+headers_match(_Pattern, [], _AllMatch, AnyMatch, MatchKind) ->
+ headers_match([], [], false, AnyMatch, MatchKind);
+
+% Data key header not in binding, go next data
+headers_match(Pattern = [{PK, _PT, _PV} | _], [{DK, _DT, _DV} | DRest],
+ AllMatch, AnyMatch, MatchKind) when PK > DK ->
+ headers_match(Pattern, DRest, AllMatch, AnyMatch, MatchKind);
+
+% Binding key header not in data, false with all, go next binding
+headers_match([{PK, _PT, _PV} | PRest], Data = [{DK, _DT, _DV} | _],
+ _AllMatch, AnyMatch, MatchKind) when PK < DK ->
+ headers_match(PRest, Data, false, AnyMatch, MatchKind);
+
+%% It's not properly specified, but a "no value" in a
+%% pattern field is supposed to mean simple presence of
+%% the corresponding data field. I've interpreted that to
+%% mean a type of "void" for the pattern field.
+headers_match([{PK, void, _PV} | PRest], [{DK, _DT, _DV} | DRest],
+ AllMatch, _AnyMatch, MatchKind) when PK == DK ->
+ headers_match(PRest, DRest, AllMatch, true, MatchKind);
+
+% Complete match, true with any, go next
+headers_match([{PK, _PT, PV} | PRest], [{DK, _DT, DV} | DRest],
+ AllMatch, _AnyMatch, MatchKind) when PK == DK andalso PV == DV ->
+ headers_match(PRest, DRest, AllMatch, true, MatchKind);
+
+% Value does not match, false with all, go next
+headers_match([{PK, _PT, _PV} | PRest], [{DK, _DT, _DV} | DRest],
+ _AllMatch, AnyMatch, MatchKind) when PK == DK ->
+ headers_match(PRest, DRest, false, AnyMatch, MatchKind).
+
+
+validate(_X) -> ok.
+create(_Tx, _X) -> ok.
+delete(_Tx, _X, _Bs) -> ok.
+policy_changed(_X1, _X2) -> ok.
+add_binding(_Tx, _X, _B) -> ok.
+remove_bindings(_Tx, _X, _Bs) -> ok.
+assert_args_equivalence(X, Args) ->
+ rabbit_exchange:assert_args_equivalence(X, Args).
diff --git a/deps/rabbit/src/rabbit_exchange_type_invalid.erl b/deps/rabbit/src/rabbit_exchange_type_invalid.erl
new file mode 100644
index 0000000000..3fa27d28e9
--- /dev/null
+++ b/deps/rabbit/src/rabbit_exchange_type_invalid.erl
@@ -0,0 +1,45 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_exchange_type_invalid).
+-include("rabbit.hrl").
+
+-behaviour(rabbit_exchange_type).
+
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, validate_binding/2,
+ create/2, delete/3, policy_changed/2, add_binding/3,
+ remove_bindings/3, assert_args_equivalence/2]).
+-export([info/1, info/2]).
+
+info(_X) -> [].
+info(_X, _) -> [].
+
+description() ->
+ [{description,
+ <<"Dummy exchange type, to be used when the intended one is not found.">>
+ }].
+
+serialise_events() -> false.
+
+-spec route(rabbit_types:exchange(), rabbit_types:delivery()) -> no_return().
+
+route(#exchange{name = Name, type = Type}, _) ->
+ rabbit_misc:protocol_error(
+ precondition_failed,
+ "Cannot route message through ~s: exchange type ~s not found",
+ [rabbit_misc:rs(Name), Type]).
+
+validate(_X) -> ok.
+validate_binding(_X, _B) -> ok.
+create(_Tx, _X) -> ok.
+delete(_Tx, _X, _Bs) -> ok.
+policy_changed(_X1, _X2) -> ok.
+add_binding(_Tx, _X, _B) -> ok.
+remove_bindings(_Tx, _X, _Bs) -> ok.
+assert_args_equivalence(X, Args) ->
+ rabbit_exchange:assert_args_equivalence(X, Args).
diff --git a/deps/rabbit/src/rabbit_exchange_type_topic.erl b/deps/rabbit/src/rabbit_exchange_type_topic.erl
new file mode 100644
index 0000000000..38b05895f2
--- /dev/null
+++ b/deps/rabbit/src/rabbit_exchange_type_topic.erl
@@ -0,0 +1,266 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_exchange_type_topic).
+
+-include("rabbit.hrl").
+
+-behaviour(rabbit_exchange_type).
+
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, validate_binding/2,
+ create/2, delete/3, policy_changed/2, add_binding/3,
+ remove_bindings/3, assert_args_equivalence/2]).
+-export([info/1, info/2]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "exchange type topic"},
+ {mfa, {rabbit_registry, register,
+ [exchange, <<"topic">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+%%----------------------------------------------------------------------------
+
+info(_X) -> [].
+info(_X, _) -> [].
+
+description() ->
+ [{description, <<"AMQP topic exchange, as per the AMQP specification">>}].
+
+serialise_events() -> false.
+
+%% NB: This may return duplicate results in some situations (that's ok)
+route(#exchange{name = X},
+ #delivery{message = #basic_message{routing_keys = Routes}}) ->
+ lists:append([begin
+ Words = split_topic_key(RKey),
+ mnesia:async_dirty(fun trie_match/2, [X, Words])
+ end || RKey <- Routes]).
+
+validate(_X) -> ok.
+validate_binding(_X, _B) -> ok.
+create(_Tx, _X) -> ok.
+
+delete(transaction, #exchange{name = X}, _Bs) ->
+ trie_remove_all_nodes(X),
+ trie_remove_all_edges(X),
+ trie_remove_all_bindings(X),
+ ok;
+delete(none, _Exchange, _Bs) ->
+ ok.
+
+policy_changed(_X1, _X2) -> ok.
+
+add_binding(transaction, _Exchange, Binding) ->
+ internal_add_binding(Binding);
+add_binding(none, _Exchange, _Binding) ->
+ ok.
+
+remove_bindings(transaction, _X, Bs) ->
+ %% See rabbit_binding:lock_route_tables for the rationale for
+ %% taking table locks.
+ case Bs of
+ [_] -> ok;
+ _ -> [mnesia:lock({table, T}, write) ||
+ T <- [rabbit_topic_trie_node,
+ rabbit_topic_trie_edge,
+ rabbit_topic_trie_binding]]
+ end,
+ [case follow_down_get_path(X, split_topic_key(K)) of
+ {ok, Path = [{FinalNode, _} | _]} ->
+ trie_remove_binding(X, FinalNode, D, Args),
+ remove_path_if_empty(X, Path);
+ {error, _Node, _RestW} ->
+ %% We're trying to remove a binding that no longer exists.
+ %% That's unexpected, but shouldn't be a problem.
+ ok
+ end || #binding{source = X, key = K, destination = D, args = Args} <- Bs],
+ ok;
+remove_bindings(none, _X, _Bs) ->
+ ok.
+
+assert_args_equivalence(X, Args) ->
+ rabbit_exchange:assert_args_equivalence(X, Args).
+
+%%----------------------------------------------------------------------------
+
+internal_add_binding(#binding{source = X, key = K, destination = D,
+ args = Args}) ->
+ FinalNode = follow_down_create(X, split_topic_key(K)),
+ trie_add_binding(X, FinalNode, D, Args),
+ ok.
+
+trie_match(X, Words) ->
+ trie_match(X, root, Words, []).
+
+trie_match(X, Node, [], ResAcc) ->
+ trie_match_part(X, Node, "#", fun trie_match_skip_any/4, [],
+ trie_bindings(X, Node) ++ ResAcc);
+trie_match(X, Node, [W | RestW] = Words, ResAcc) ->
+ lists:foldl(fun ({WArg, MatchFun, RestWArg}, Acc) ->
+ trie_match_part(X, Node, WArg, MatchFun, RestWArg, Acc)
+ end, ResAcc, [{W, fun trie_match/4, RestW},
+ {"*", fun trie_match/4, RestW},
+ {"#", fun trie_match_skip_any/4, Words}]).
+
+trie_match_part(X, Node, Search, MatchFun, RestW, ResAcc) ->
+ case trie_child(X, Node, Search) of
+ {ok, NextNode} -> MatchFun(X, NextNode, RestW, ResAcc);
+ error -> ResAcc
+ end.
+
+trie_match_skip_any(X, Node, [], ResAcc) ->
+ trie_match(X, Node, [], ResAcc);
+trie_match_skip_any(X, Node, [_ | RestW] = Words, ResAcc) ->
+ trie_match_skip_any(X, Node, RestW,
+ trie_match(X, Node, Words, ResAcc)).
+
+follow_down_create(X, Words) ->
+ case follow_down_last_node(X, Words) of
+ {ok, FinalNode} -> FinalNode;
+ {error, Node, RestW} -> lists:foldl(
+ fun (W, CurNode) ->
+ NewNode = new_node_id(),
+ trie_add_edge(X, CurNode, NewNode, W),
+ NewNode
+ end, Node, RestW)
+ end.
+
+follow_down_last_node(X, Words) ->
+ follow_down(X, fun (_, Node, _) -> Node end, root, Words).
+
+follow_down_get_path(X, Words) ->
+ follow_down(X, fun (W, Node, PathAcc) -> [{Node, W} | PathAcc] end,
+ [{root, none}], Words).
+
+follow_down(X, AccFun, Acc0, Words) ->
+ follow_down(X, root, AccFun, Acc0, Words).
+
+follow_down(_X, _CurNode, _AccFun, Acc, []) ->
+ {ok, Acc};
+follow_down(X, CurNode, AccFun, Acc, Words = [W | RestW]) ->
+ case trie_child(X, CurNode, W) of
+ {ok, NextNode} -> follow_down(X, NextNode, AccFun,
+ AccFun(W, NextNode, Acc), RestW);
+ error -> {error, Acc, Words}
+ end.
+
+remove_path_if_empty(_, [{root, none}]) ->
+ ok;
+remove_path_if_empty(X, [{Node, W} | [{Parent, _} | _] = RestPath]) ->
+ case mnesia:read(rabbit_topic_trie_node,
+ #trie_node{exchange_name = X, node_id = Node}, write) of
+ [] -> trie_remove_edge(X, Parent, Node, W),
+ remove_path_if_empty(X, RestPath);
+ _ -> ok
+ end.
+
+trie_child(X, Node, Word) ->
+ case mnesia:read({rabbit_topic_trie_edge,
+ #trie_edge{exchange_name = X,
+ node_id = Node,
+ word = Word}}) of
+ [#topic_trie_edge{node_id = NextNode}] -> {ok, NextNode};
+ [] -> error
+ end.
+
+trie_bindings(X, Node) ->
+ MatchHead = #topic_trie_binding{
+ trie_binding = #trie_binding{exchange_name = X,
+ node_id = Node,
+ destination = '$1',
+ arguments = '_'}},
+ mnesia:select(rabbit_topic_trie_binding, [{MatchHead, [], ['$1']}]).
+
+trie_update_node_counts(X, Node, Field, Delta) ->
+ E = case mnesia:read(rabbit_topic_trie_node,
+ #trie_node{exchange_name = X,
+ node_id = Node}, write) of
+ [] -> #topic_trie_node{trie_node = #trie_node{
+ exchange_name = X,
+ node_id = Node},
+ edge_count = 0,
+ binding_count = 0};
+ [E0] -> E0
+ end,
+ case setelement(Field, E, element(Field, E) + Delta) of
+ #topic_trie_node{edge_count = 0, binding_count = 0} ->
+ ok = mnesia:delete_object(rabbit_topic_trie_node, E, write);
+ EN ->
+ ok = mnesia:write(rabbit_topic_trie_node, EN, write)
+ end.
+
+trie_add_edge(X, FromNode, ToNode, W) ->
+ trie_update_node_counts(X, FromNode, #topic_trie_node.edge_count, +1),
+ trie_edge_op(X, FromNode, ToNode, W, fun mnesia:write/3).
+
+trie_remove_edge(X, FromNode, ToNode, W) ->
+ trie_update_node_counts(X, FromNode, #topic_trie_node.edge_count, -1),
+ trie_edge_op(X, FromNode, ToNode, W, fun mnesia:delete_object/3).
+
+trie_edge_op(X, FromNode, ToNode, W, Op) ->
+ ok = Op(rabbit_topic_trie_edge,
+ #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X,
+ node_id = FromNode,
+ word = W},
+ node_id = ToNode},
+ write).
+
+trie_add_binding(X, Node, D, Args) ->
+ trie_update_node_counts(X, Node, #topic_trie_node.binding_count, +1),
+ trie_binding_op(X, Node, D, Args, fun mnesia:write/3).
+
+trie_remove_binding(X, Node, D, Args) ->
+ trie_update_node_counts(X, Node, #topic_trie_node.binding_count, -1),
+ trie_binding_op(X, Node, D, Args, fun mnesia:delete_object/3).
+
+trie_binding_op(X, Node, D, Args, Op) ->
+ ok = Op(rabbit_topic_trie_binding,
+ #topic_trie_binding{
+ trie_binding = #trie_binding{exchange_name = X,
+ node_id = Node,
+ destination = D,
+ arguments = Args}},
+ write).
+
+trie_remove_all_nodes(X) ->
+ remove_all(rabbit_topic_trie_node,
+ #topic_trie_node{trie_node = #trie_node{exchange_name = X,
+ _ = '_'},
+ _ = '_'}).
+
+trie_remove_all_edges(X) ->
+ remove_all(rabbit_topic_trie_edge,
+ #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X,
+ _ = '_'},
+ _ = '_'}).
+
+trie_remove_all_bindings(X) ->
+ remove_all(rabbit_topic_trie_binding,
+ #topic_trie_binding{
+ trie_binding = #trie_binding{exchange_name = X, _ = '_'},
+ _ = '_'}).
+
+remove_all(Table, Pattern) ->
+ lists:foreach(fun (R) -> mnesia:delete_object(Table, R, write) end,
+ mnesia:match_object(Table, Pattern, write)).
+
+new_node_id() ->
+ rabbit_guid:gen().
+
+split_topic_key(Key) ->
+ split_topic_key(Key, [], []).
+
+split_topic_key(<<>>, [], []) ->
+ [];
+split_topic_key(<<>>, RevWordAcc, RevResAcc) ->
+ lists:reverse([lists:reverse(RevWordAcc) | RevResAcc]);
+split_topic_key(<<$., Rest/binary>>, RevWordAcc, RevResAcc) ->
+ split_topic_key(Rest, [], [lists:reverse(RevWordAcc) | RevResAcc]);
+split_topic_key(<<C:8, Rest/binary>>, RevWordAcc, RevResAcc) ->
+ split_topic_key(Rest, [C | RevWordAcc], RevResAcc).
diff --git a/deps/rabbit/src/rabbit_feature_flags.erl b/deps/rabbit/src/rabbit_feature_flags.erl
new file mode 100644
index 0000000000..921ec9ab53
--- /dev/null
+++ b/deps/rabbit/src/rabbit_feature_flags.erl
@@ -0,0 +1,2470 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% @author The RabbitMQ team
+%% @copyright 2018-2020 VMware, Inc. or its affiliates.
+%%
+%% @doc
+%% This module offers a framework to declare capabilities a RabbitMQ node
+%% supports and therefore a way to determine if multiple RabbitMQ nodes in
+%% a cluster are compatible and can work together.
+%%
+%% == What a feature flag is ==
+%%
+%% A <strong>feature flag</strong> is a name and several properties given
+%% to a change in RabbitMQ which impacts its communication with other
+%% RabbitMQ nodes. This kind of change can be:
+%% <ul>
+%% <li>an update to an Erlang record</li>
+%% <li>a modification to a replicated Mnesia table schema</li>
+%% <li>a modification to Erlang messages exchanged between Erlang processes
+%% which might run on remote nodes</li>
+%% </ul>
+%%
+%% A feature flag is qualified by:
+%% <ul>
+%% <li>a <strong>name</strong></li>
+%% <li>a <strong>description</strong> (optional)</li>
+%% <li>a list of other <strong>feature flags this feature flag depends on
+%% </strong> (optional). This can be useful when the change builds up on
+%% top of a previous change. For instance, it expands a record which was
+%% already modified by a previous feature flag.</li>
+%% <li>a <strong>migration function</strong> (optional). If provided, this
+%% function is called when the feature flag is enabled. It is responsible
+%% for doing all the data conversion, if any, and confirming the feature
+%% flag can be enabled.</li>
+%% <li>a level of stability (stable or experimental). For now, this is only
+%% informational. But it might be used for specific purposes in the
+%% future.</li>
+%% </ul>
+%%
+%% == How to declare a feature flag ==
+%%
+%% To define a new feature flag, you need to use the
+%% `rabbit_feature_flag()' module attribute:
+%%
+%% ```
+%% -rabbit_feature_flag(FeatureFlag).
+%% '''
+%%
+%% `FeatureFlag' is a {@type feature_flag_modattr()}.
+%%
+%% == How to enable a feature flag ==
+%%
+%% To enable a supported feature flag, you have the following solutions:
+%%
+%% <ul>
+%% <li>Using this module API:
+%% ```
+%% rabbit_feature_flags:enable(FeatureFlagName).
+%% '''
+%% </li>
+%% <li>Using the `rabbitmqctl' CLI:
+%% ```
+%% rabbitmqctl enable_feature_flag "$feature_flag_name"
+%% '''
+%% </li>
+%% </ul>
+%%
+%% == How to disable a feature flag ==
+%%
+%% Once enabled, there is <strong>currently no way to disable</strong> a
+%% feature flag.
+
+-module(rabbit_feature_flags).
+
+-export([list/0,
+ list/1,
+ list/2,
+ enable/1,
+ enable_all/0,
+ disable/1,
+ disable_all/0,
+ is_supported/1,
+ is_supported/2,
+ is_supported_locally/1,
+ is_supported_remotely/1,
+ is_supported_remotely/2,
+ is_supported_remotely/3,
+ is_enabled/1,
+ is_enabled/2,
+ is_disabled/1,
+ is_disabled/2,
+ info/0,
+ info/1,
+ init/0,
+ get_state/1,
+ get_stability/1,
+ check_node_compatibility/1,
+ check_node_compatibility/2,
+ is_node_compatible/1,
+ is_node_compatible/2,
+ sync_feature_flags_with_cluster/2,
+ sync_feature_flags_with_cluster/3,
+ refresh_feature_flags_after_app_load/1,
+ enabled_feature_flags_list_file/0
+ ]).
+
+%% RabbitMQ internal use only.
+-export([initialize_registry/0,
+ initialize_registry/1,
+ mark_as_enabled_locally/2,
+ remote_nodes/0,
+ running_remote_nodes/0,
+ does_node_support/3,
+ merge_feature_flags_from_unknown_apps/1,
+ do_sync_feature_flags_with_node/1]).
+
+-ifdef(TEST).
+-export([inject_test_feature_flags/1,
+ initialize_registry/3,
+ query_supported_feature_flags/0,
+ mark_as_enabled_remotely/2,
+ mark_as_enabled_remotely/4,
+ registry_loading_lock/0]).
+-endif.
+
+%% Default timeout for operations on remote nodes.
+-define(TIMEOUT, 60000).
+
+-define(FF_REGISTRY_LOADING_LOCK, {feature_flags_registry_loading, self()}).
+-define(FF_STATE_CHANGE_LOCK, {feature_flags_state_change, self()}).
+
+-type feature_flag_modattr() :: {feature_name(),
+ feature_props()}.
+%% The value of a `-rabbitmq_feature_flag()' module attribute used to
+%% declare a new feature flag.
+
+-type feature_name() :: atom().
+%% The feature flag's name. It is used in many places to identify a
+%% specific feature flag. In particular, this is how an end-user (or
+%% the CLI) can enable a feature flag. This is also the only bit which
+%% is persisted so a node remember which feature flags are enabled.
+
+-type feature_props() :: #{desc => string(),
+ doc_url => string(),
+ stability => stability(),
+ depends_on => [feature_name()],
+ migration_fun => migration_fun_name()}.
+%% The feature flag properties.
+%%
+%% All properties are optional.
+%%
+%% The properties are:
+%% <ul>
+%% <li>`desc': a description of the feature flag</li>
+%% <li>`doc_url': a URL pointing to more documentation about the feature
+%% flag</li>
+%% <li>`stability': the level of stability</li>
+%% <li>`depends_on': a list of feature flags name which must be enabled
+%% before this one</li>
+%% <li>`migration_fun': a migration function specified by its module and
+%% function names</li>
+%% </ul>
+%%
+%% Note that the `migration_fun' is a {@type migration_fun_name()},
+%% not a {@type migration_fun()}. However, the function signature
+%% must conform to the {@type migration_fun()} signature. The reason
+%% is that we must be able to represent it as an Erlang term when
+%% we regenerate the registry module source code (using {@link
+%% erl_syntax:abstract/1}).
+
+-type feature_flags() :: #{feature_name() => feature_props_extended()}.
+%% The feature flags map as returned or accepted by several functions in
+%% this module. In particular, this what the {@link list/0} function
+%% returns.
+
+-type feature_props_extended() :: #{desc => string(),
+ doc_url => string(),
+ stability => stability(),
+ migration_fun => migration_fun_name(),
+ depends_on => [feature_name()],
+ provided_by => atom()}.
+%% The feature flag properties, once expanded by this module when feature
+%% flags are discovered.
+%%
+%% The new properties compared to {@type feature_props()} are:
+%% <ul>
+%% <li>`provided_by': the name of the application providing the feature flag</li>
+%% </ul>
+
+-type feature_state() :: boolean() | state_changing.
+%% The state of the feature flag: enabled if `true', disabled if `false'
+%% or `state_changing'.
+
+-type feature_states() :: #{feature_name() => feature_state()}.
+
+-type stability() :: stable | experimental.
+%% The level of stability of a feature flag. Currently, only informational.
+
+-type migration_fun_name() :: {Module :: atom(), Function :: atom()}.
+%% The name of the module and function to call when changing the state of
+%% the feature flag.
+
+-type migration_fun() :: fun((feature_name(),
+ feature_props_extended(),
+ migration_fun_context())
+ -> ok | {error, any()} | % context = enable
+ boolean() | undefined). % context = is_enabled
+%% The migration function signature.
+%%
+%% It is called with context `enable' when a feature flag is being enabled.
+%% The function is responsible for this feature-flag-specific verification
+%% and data conversion. It returns `ok' if RabbitMQ can mark the feature
+%% flag as enabled an continue with the next one, if any. Otherwise, it
+%% returns `{error, any()}' if there is an error and the feature flag should
+%% remain disabled. The function must be idempotent: if the feature flag is
+%% already enabled on another node and the local node is running this function
+%% again because it is syncing its feature flags state, it should succeed.
+%%
+%% It is called with the context `is_enabled' to check if a feature flag
+%% is actually enabled. It is useful on RabbitMQ startup, just in case
+%% the previous instance failed to write the feature flags list file.
+
+-type migration_fun_context() :: enable | is_enabled.
+
+-type registry_vsn() :: term().
+
+-export_type([feature_flag_modattr/0,
+ feature_props/0,
+ feature_name/0,
+ feature_flags/0,
+ feature_props_extended/0,
+ feature_state/0,
+ feature_states/0,
+ stability/0,
+ migration_fun_name/0,
+ migration_fun/0,
+ migration_fun_context/0]).
+
+-on_load(on_load/0).
+
+-spec list() -> feature_flags().
+%% @doc
+%% Lists all supported feature flags.
+%%
+%% @returns A map of all supported feature flags.
+
+list() -> list(all).
+
+-spec list(Which :: all | enabled | disabled) -> feature_flags().
+%% @doc
+%% Lists all, enabled or disabled feature flags, depending on the argument.
+%%
+%% @param Which The group of feature flags to return: `all', `enabled' or
+%% `disabled'.
+%% @returns A map of selected feature flags.
+
+list(all) -> rabbit_ff_registry:list(all);
+list(enabled) -> rabbit_ff_registry:list(enabled);
+list(disabled) -> maps:filter(
+ fun(FeatureName, _) -> is_disabled(FeatureName) end,
+ list(all)).
+
+-spec list(all | enabled | disabled, stability()) -> feature_flags().
+%% @doc
+%% Lists all, enabled or disabled feature flags, depending on the first
+%% argument, only keeping those having the specified stability.
+%%
+%% @param Which The group of feature flags to return: `all', `enabled' or
+%% `disabled'.
+%% @param Stability The level of stability used to filter the map of feature
+%% flags.
+%% @returns A map of selected feature flags.
+
+list(Which, Stability)
+ when Stability =:= stable orelse Stability =:= experimental ->
+ maps:filter(fun(_, FeatureProps) ->
+ Stability =:= get_stability(FeatureProps)
+ end, list(Which)).
+
+-spec enable(feature_name() | [feature_name()]) -> ok |
+ {error, Reason :: any()}.
+%% @doc
+%% Enables the specified feature flag or set of feature flags.
+%%
+%% @param FeatureName The name or the list of names of feature flags to
+%% enable.
+%% @returns `ok' if the feature flags (and all the feature flags they
+%% depend on) were successfully enabled, or `{error, Reason}' if one
+%% feature flag could not be enabled (subsequent feature flags in the
+%% dependency tree are left unchanged).
+
+enable(FeatureName) when is_atom(FeatureName) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flag `~s`: REQUEST TO ENABLE",
+ [FeatureName]),
+ case is_enabled(FeatureName) of
+ true ->
+ rabbit_log_feature_flags:debug(
+ "Feature flag `~s`: already enabled",
+ [FeatureName]),
+ ok;
+ false ->
+ rabbit_log_feature_flags:debug(
+ "Feature flag `~s`: not enabled, check if supported by cluster",
+ [FeatureName]),
+ %% The feature flag must be supported locally and remotely
+ %% (i.e. by all members of the cluster).
+ case is_supported(FeatureName) of
+ true ->
+ rabbit_log_feature_flags:info(
+ "Feature flag `~s`: supported, attempt to enable...",
+ [FeatureName]),
+ do_enable(FeatureName);
+ false ->
+ rabbit_log_feature_flags:error(
+ "Feature flag `~s`: not supported",
+ [FeatureName]),
+ {error, unsupported}
+ end
+ end;
+enable(FeatureNames) when is_list(FeatureNames) ->
+ with_feature_flags(FeatureNames, fun enable/1).
+
+-spec enable_all() -> ok | {error, any()}.
+%% @doc
+%% Enables all supported feature flags.
+%%
+%% @returns `ok' if the feature flags were successfully enabled,
+%% or `{error, Reason}' if one feature flag could not be enabled
+%% (subsequent feature flags in the dependency tree are left
+%% unchanged).
+
+enable_all() ->
+ with_feature_flags(maps:keys(list(all)), fun enable/1).
+
+-spec disable(feature_name() | [feature_name()]) -> ok | {error, any()}.
+%% @doc
+%% Disables the specified feature flag or set of feature flags.
+%%
+%% @param FeatureName The name or the list of names of feature flags to
+%% disable.
+%% @returns `ok' if the feature flags (and all the feature flags they
+%% depend on) were successfully disabled, or `{error, Reason}' if one
+%% feature flag could not be disabled (subsequent feature flags in the
+%% dependency tree are left unchanged).
+
+disable(FeatureName) when is_atom(FeatureName) ->
+ {error, unsupported};
+disable(FeatureNames) when is_list(FeatureNames) ->
+ with_feature_flags(FeatureNames, fun disable/1).
+
+-spec disable_all() -> ok | {error, any()}.
+%% @doc
+%% Disables all supported feature flags.
+%%
+%% @returns `ok' if the feature flags were successfully disabled,
+%% or `{error, Reason}' if one feature flag could not be disabled
+%% (subsequent feature flags in the dependency tree are left
+%% unchanged).
+
+disable_all() ->
+ with_feature_flags(maps:keys(list(all)), fun disable/1).
+
+-spec with_feature_flags([feature_name()],
+ fun((feature_name()) -> ok | {error, any()})) ->
+ ok | {error, any()}.
+%% @private
+
+with_feature_flags([FeatureName | Rest], Fun) ->
+ case Fun(FeatureName) of
+ ok -> with_feature_flags(Rest, Fun);
+ Error -> Error
+ end;
+with_feature_flags([], _) ->
+ ok.
+
+-spec is_supported(feature_name() | [feature_name()]) -> boolean().
+%% @doc
+%% Returns if a single feature flag or a set of feature flags is
+%% supported by the entire cluster.
+%%
+%% This is the same as calling both {@link is_supported_locally/1} and
+%% {@link is_supported_remotely/1} with a logical AND.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @returns `true' if the set of feature flags is entirely supported, or
+%% `false' if one of them is not or the RPC timed out.
+
+is_supported(FeatureNames) ->
+ is_supported_locally(FeatureNames) andalso
+ is_supported_remotely(FeatureNames).
+
+-spec is_supported(feature_name() | [feature_name()], timeout()) ->
+ boolean().
+%% @doc
+%% Returns if a single feature flag or a set of feature flags is
+%% supported by the entire cluster.
+%%
+%% This is the same as calling both {@link is_supported_locally/1} and
+%% {@link is_supported_remotely/2} with a logical AND.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @param Timeout Time in milliseconds after which the RPC gives up.
+%% @returns `true' if the set of feature flags is entirely supported, or
+%% `false' if one of them is not or the RPC timed out.
+
+is_supported(FeatureNames, Timeout) ->
+ is_supported_locally(FeatureNames) andalso
+ is_supported_remotely(FeatureNames, Timeout).
+
+-spec is_supported_locally(feature_name() | [feature_name()]) -> boolean().
+%% @doc
+%% Returns if a single feature flag or a set of feature flags is
+%% supported by the local node.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @returns `true' if the set of feature flags is entirely supported, or
+%% `false' if one of them is not.
+
+is_supported_locally(FeatureName) when is_atom(FeatureName) ->
+ rabbit_ff_registry:is_supported(FeatureName);
+is_supported_locally(FeatureNames) when is_list(FeatureNames) ->
+ lists:all(fun(F) -> rabbit_ff_registry:is_supported(F) end, FeatureNames).
+
+-spec is_supported_remotely(feature_name() | [feature_name()]) -> boolean().
+%% @doc
+%% Returns if a single feature flag or a set of feature flags is
+%% supported by all remote nodes.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @returns `true' if the set of feature flags is entirely supported, or
+%% `false' if one of them is not or the RPC timed out.
+
+is_supported_remotely(FeatureNames) ->
+ is_supported_remotely(FeatureNames, ?TIMEOUT).
+
+-spec is_supported_remotely(feature_name() | [feature_name()], timeout()) -> boolean().
+%% @doc
+%% Returns if a single feature flag or a set of feature flags is
+%% supported by all remote nodes.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @param Timeout Time in milliseconds after which the RPC gives up.
+%% @returns `true' if the set of feature flags is entirely supported, or
+%% `false' if one of them is not or the RPC timed out.
+
+is_supported_remotely(FeatureName, Timeout) when is_atom(FeatureName) ->
+ is_supported_remotely([FeatureName], Timeout);
+is_supported_remotely([], _) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: skipping query for feature flags support as the "
+ "given list is empty"),
+ true;
+is_supported_remotely(FeatureNames, Timeout) when is_list(FeatureNames) ->
+ case running_remote_nodes() of
+ [] ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: isolated node; skipping remote node query "
+ "=> consider `~p` supported",
+ [FeatureNames]),
+ true;
+ RemoteNodes ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: about to query these remote nodes about "
+ "support for `~p`: ~p",
+ [FeatureNames, RemoteNodes]),
+ is_supported_remotely(RemoteNodes, FeatureNames, Timeout)
+ end.
+
+-spec is_supported_remotely([node()],
+ feature_name() | [feature_name()],
+ timeout()) -> boolean().
+%% @doc
+%% Returns if a single feature flag or a set of feature flags is
+%% supported by specified remote nodes.
+%%
+%% @param RemoteNodes The list of remote nodes to query.
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @param Timeout Time in milliseconds after which the RPC gives up.
+%% @returns `true' if the set of feature flags is entirely supported by
+%% all nodes, or `false' if one of them is not or the RPC timed out.
+
+is_supported_remotely(_, [], _) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: skipping query for feature flags support as the "
+ "given list is empty"),
+ true;
+is_supported_remotely([Node | Rest], FeatureNames, Timeout) ->
+ case does_node_support(Node, FeatureNames, Timeout) of
+ true ->
+ is_supported_remotely(Rest, FeatureNames, Timeout);
+ false ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: stopping query for support for `~p` here",
+ [FeatureNames]),
+ false
+ end;
+is_supported_remotely([], FeatureNames, _) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: all running remote nodes support `~p`",
+ [FeatureNames]),
+ true.
+
+-spec is_enabled(feature_name() | [feature_name()]) -> boolean().
+%% @doc
+%% Returns if a single feature flag or a set of feature flags is
+%% enabled.
+%%
+%% This is the same as calling {@link is_enabled/2} as a `blocking'
+%% call.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @returns `true' if the set of feature flags is enabled, or
+%% `false' if one of them is not.
+
+is_enabled(FeatureNames) ->
+ is_enabled(FeatureNames, blocking).
+
+-spec is_enabled
+(feature_name() | [feature_name()], blocking) ->
+ boolean();
+(feature_name() | [feature_name()], non_blocking) ->
+ feature_state().
+%% @doc
+%% Returns if a single feature flag or a set of feature flags is
+%% enabled.
+%%
+%% When `blocking' is passed, the function waits (blocks) for the
+%% state of a feature flag being disabled or enabled stabilizes before
+%% returning its final state.
+%%
+%% When `non_blocking' is passed, the function returns immediately with
+%% the state of the feature flag (`true' if enabled, `false' otherwise)
+%% or `state_changing' is the state is being changed at the time of the
+%% call.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @returns `true' if the set of feature flags is enabled,
+%% `false' if one of them is not, or `state_changing' if one of them
+%% is being worked on. Note that `state_changing' has precedence over
+%% `false', so if one is `false' and another one is `state_changing',
+%% `state_changing' is returned.
+
+is_enabled(FeatureNames, non_blocking) ->
+ is_enabled_nb(FeatureNames);
+is_enabled(FeatureNames, blocking) ->
+ case is_enabled_nb(FeatureNames) of
+ state_changing ->
+ global:set_lock(?FF_STATE_CHANGE_LOCK),
+ global:del_lock(?FF_STATE_CHANGE_LOCK),
+ is_enabled(FeatureNames, blocking);
+ IsEnabled ->
+ IsEnabled
+ end.
+
+is_enabled_nb(FeatureName) when is_atom(FeatureName) ->
+ rabbit_ff_registry:is_enabled(FeatureName);
+is_enabled_nb(FeatureNames) when is_list(FeatureNames) ->
+ lists:foldl(
+ fun
+ (_F, state_changing = Acc) ->
+ Acc;
+ (F, false = Acc) ->
+ case rabbit_ff_registry:is_enabled(F) of
+ state_changing -> state_changing;
+ _ -> Acc
+ end;
+ (F, _) ->
+ rabbit_ff_registry:is_enabled(F)
+ end,
+ true, FeatureNames).
+
+-spec is_disabled(feature_name() | [feature_name()]) -> boolean().
+%% @doc
+%% Returns if a single feature flag or one feature flag in a set of
+%% feature flags is disabled.
+%%
+%% This is the same as negating the result of {@link is_enabled/1}.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @returns `true' if one of the feature flags is disabled, or
+%% `false' if they are all enabled.
+
+is_disabled(FeatureNames) ->
+ is_disabled(FeatureNames, blocking).
+
+-spec is_disabled
+(feature_name() | [feature_name()], blocking) ->
+ boolean();
+(feature_name() | [feature_name()], non_blocking) ->
+ feature_state().
+%% @doc
+%% Returns if a single feature flag or one feature flag in a set of
+%% feature flags is disabled.
+%%
+%% This is the same as negating the result of {@link is_enabled/2},
+%% except that `state_changing' is returned as is.
+%%
+%% See {@link is_enabled/2} for a description of the `blocking' and
+%% `non_blocking' modes.
+%%
+%% @param FeatureNames The name or a list of names of the feature flag(s)
+%% to be checked.
+%% @returns `true' if one feature flag in the set of feature flags is
+%% disabled, `false' if they are all enabled, or `state_changing' if
+%% one of them is being worked on. Note that `state_changing' has
+%% precedence over `true', so if one is `true' (i.e. disabled) and
+%% another one is `state_changing', `state_changing' is returned.
+%%
+%% @see is_enabled/2
+
+is_disabled(FeatureName, Blocking) ->
+ case is_enabled(FeatureName, Blocking) of
+ state_changing -> state_changing;
+ IsEnabled -> not IsEnabled
+ end.
+
+-spec info() -> ok.
+%% @doc
+%% Displays a table on stdout summing up the supported feature flags,
+%% their state and various informations about them.
+
+info() ->
+ info(#{}).
+
+-spec info(#{color => boolean(),
+ lines => boolean(),
+ verbose => non_neg_integer()}) -> ok.
+%% @doc
+%% Displays a table on stdout summing up the supported feature flags,
+%% their state and various informations about them.
+%%
+%% Supported options are:
+%% <ul>
+%% <li>`color': a boolean to indicate if colors should be used to
+%% highlight some elements.</li>
+%% <li>`lines': a boolean to indicate if table borders should be drawn
+%% using ASCII lines instead of regular characters.</li>
+%% <li>`verbose': a non-negative integer to specify the level of
+%% verbosity.</li>
+%% </ul>
+%%
+%% @param Options A map of various options to tune the displayed table.
+
+info(Options) when is_map(Options) ->
+ rabbit_ff_extra:info(Options).
+
+-spec get_state(feature_name()) -> enabled | disabled | unavailable.
+%% @doc
+%% Returns the state of a feature flag.
+%%
+%% The possible states are:
+%% <ul>
+%% <li>`enabled': the feature flag is enabled.</li>
+%% <li>`disabled': the feature flag is supported by all nodes in the
+%% cluster but currently disabled.</li>
+%% <li>`unavailable': the feature flag is unsupported by at least one
+%% node in the cluster and can not be enabled for now.</li>
+%% </ul>
+%%
+%% @param FeatureName The name of the feature flag to check.
+%% @returns `enabled', `disabled' or `unavailable'.
+
+get_state(FeatureName) when is_atom(FeatureName) ->
+ IsEnabled = is_enabled(FeatureName),
+ IsSupported = is_supported(FeatureName),
+ case IsEnabled of
+ true -> enabled;
+ false -> case IsSupported of
+ true -> disabled;
+ false -> unavailable
+ end
+ end.
+
+-spec get_stability(feature_name() | feature_props_extended()) -> stability().
+%% @doc
+%% Returns the stability of a feature flag.
+%%
+%% The possible stability levels are:
+%% <ul>
+%% <li>`stable': the feature flag is stable and will not change in future
+%% releases: it can be enabled in production.</li>
+%% <li>`experimental': the feature flag is experimental and may change in
+%% the future (without a guaranteed upgrade path): enabling it in
+%% production is not recommended.</li>
+%% <li>`unavailable': the feature flag is unsupported by at least one
+%% node in the cluster and can not be enabled for now.</li>
+%% </ul>
+%%
+%% @param FeatureName The name of the feature flag to check.
+%% @returns `stable' or `experimental'.
+
+get_stability(FeatureName) when is_atom(FeatureName) ->
+ case rabbit_ff_registry:get(FeatureName) of
+ undefined -> undefined;
+ FeatureProps -> get_stability(FeatureProps)
+ end;
+get_stability(FeatureProps) when is_map(FeatureProps) ->
+ maps:get(stability, FeatureProps, stable).
+
+%% -------------------------------------------------------------------
+%% Feature flags registry.
+%% -------------------------------------------------------------------
+
+-spec init() -> ok | no_return().
+%% @private
+
+init() ->
+ %% We want to make sure the `feature_flags` file exists once
+ %% RabbitMQ was started at least once. This is not required by
+ %% this module (it works fine if the file is missing) but it helps
+ %% external tools.
+ _ = ensure_enabled_feature_flags_list_file_exists(),
+
+ %% We also "list" supported feature flags. We are not interested in
+ %% that list, however, it triggers the first initialization of the
+ %% registry.
+ _ = list(all),
+ ok.
+
+-spec initialize_registry() -> ok | {error, any()} | no_return().
+%% @private
+%% @doc
+%% Initializes or reinitializes the registry.
+%%
+%% The registry is an Erlang module recompiled at runtime to hold the
+%% state of all supported feature flags.
+%%
+%% That Erlang module is called {@link rabbit_ff_registry}. The initial
+%% source code of this module simply calls this function so it is
+%% replaced by a proper registry.
+%%
+%% Once replaced, the registry contains the map of all supported feature
+%% flags and their state. This is makes it very efficient to query a
+%% feature flag state or property.
+%%
+%% The registry is local to all RabbitMQ nodes.
+
+initialize_registry() ->
+ initialize_registry(#{}).
+
+-spec initialize_registry(feature_flags()) ->
+ ok | {error, any()} | no_return().
+%% @private
+%% @doc
+%% Initializes or reinitializes the registry.
+%%
+%% See {@link initialize_registry/0} for a description of the registry.
+%%
+%% This function takes a map of new supported feature flags (so their
+%% name and extended properties) to add to the existing known feature
+%% flags.
+
+initialize_registry(NewSupportedFeatureFlags) ->
+ %% The first step is to get the feature flag states: if this is the
+ %% first time we initialize it, we read the list from disk (the
+ %% `feature_flags` file). Otherwise we query the existing registry
+ %% before it is replaced.
+ RegistryInitialized = rabbit_ff_registry:is_registry_initialized(),
+ FeatureStates = case RegistryInitialized of
+ true ->
+ rabbit_ff_registry:states();
+ false ->
+ EnabledFeatureNames =
+ read_enabled_feature_flags_list(),
+ list_of_enabled_feature_flags_to_feature_states(
+ EnabledFeatureNames)
+ end,
+
+ %% We also record if the feature flags state was correctly written
+ %% to disk. Currently we don't use this information, but in the
+ %% future, we might want to retry the write if it failed so far.
+ %%
+ %% TODO: Retry to write the feature flags state if the first try
+ %% failed.
+ WrittenToDisk = case RegistryInitialized of
+ true ->
+ rabbit_ff_registry:is_registry_written_to_disk();
+ false ->
+ true
+ end,
+ initialize_registry(NewSupportedFeatureFlags,
+ FeatureStates,
+ WrittenToDisk).
+
+-spec list_of_enabled_feature_flags_to_feature_states([feature_name()]) ->
+ feature_states().
+
+list_of_enabled_feature_flags_to_feature_states(FeatureNames) ->
+ maps:from_list([{FeatureName, true} || FeatureName <- FeatureNames]).
+
+-spec initialize_registry(feature_flags(),
+ feature_states(),
+ boolean()) ->
+ ok | {error, any()} | no_return().
+%% @private
+%% @doc
+%% Initializes or reinitializes the registry.
+%%
+%% See {@link initialize_registry/0} for a description of the registry.
+%%
+%% This function takes a map of new supported feature flags (so their
+%% name and extended properties) to add to the existing known feature
+%% flags, a map of the new feature flag states (whether they are
+%% enabled, disabled or `state_changing'), and a flag to indicate if the
+%% feature flag states was recorded to disk.
+%%
+%% The latter is used to block callers asking if a feature flag is
+%% enabled or disabled while its state is changing.
+
+initialize_registry(NewSupportedFeatureFlags,
+ NewFeatureStates,
+ WrittenToDisk) ->
+ Ret = maybe_initialize_registry(NewSupportedFeatureFlags,
+ NewFeatureStates,
+ WrittenToDisk),
+ case Ret of
+ ok -> ok;
+ restart -> initialize_registry(NewSupportedFeatureFlags,
+ NewFeatureStates,
+ WrittenToDisk);
+ Error -> Error
+ end.
+
+-spec maybe_initialize_registry(feature_flags(),
+ feature_states(),
+ boolean()) ->
+ ok | restart | {error, any()} | no_return().
+
+maybe_initialize_registry(NewSupportedFeatureFlags,
+ NewFeatureStates,
+ WrittenToDisk) ->
+ %% We save the version of the current registry before computing
+ %% the new one. This is used when we do the actual reload: if the
+ %% current registry was reloaded in the meantime, we need to restart
+ %% the computation to make sure we don't loose data.
+ RegistryVsn = registry_vsn(),
+
+ %% We take the feature flags already registered.
+ RegistryInitialized = rabbit_ff_registry:is_registry_initialized(),
+ KnownFeatureFlags1 = case RegistryInitialized of
+ true -> rabbit_ff_registry:list(all);
+ false -> #{}
+ end,
+
+ %% Query the list (it's a map to be exact) of known
+ %% supported feature flags. That list comes from the
+ %% `-rabbitmq_feature_flag().` module attributes exposed by all
+ %% currently loaded Erlang modules.
+ KnownFeatureFlags2 = query_supported_feature_flags(),
+
+ %% We merge the feature flags we already knew about
+ %% (KnownFeatureFlags1), those found in the loaded applications
+ %% (KnownFeatureFlags2) and those specified in arguments
+ %% (NewSupportedFeatureFlags). The latter come from remote nodes
+ %% usually: for example, they can come from plugins loaded on remote
+ %% node but the plugins are missing locally. In this case, we
+ %% consider those feature flags supported because there is no code
+ %% locally which would cause issues.
+ %%
+ %% It means that the list of feature flags only grows. we don't try
+ %% to clean it at some point because we want to remember about the
+ %% feature flags we saw (and their state). It should be fine because
+ %% that list should remain small.
+ KnownFeatureFlags = maps:merge(KnownFeatureFlags1,
+ KnownFeatureFlags2),
+ AllFeatureFlags = maps:merge(KnownFeatureFlags,
+ NewSupportedFeatureFlags),
+
+ %% Next we want to update the feature states, based on the new
+ %% states passed as arguments.
+ FeatureStates0 = case RegistryInitialized of
+ true ->
+ maps:merge(rabbit_ff_registry:states(),
+ NewFeatureStates);
+ false ->
+ NewFeatureStates
+ end,
+ FeatureStates = maps:filter(
+ fun(_, true) -> true;
+ (_, state_changing) -> true;
+ (_, false) -> false
+ end, FeatureStates0),
+
+ Proceed = does_registry_need_refresh(AllFeatureFlags,
+ FeatureStates,
+ WrittenToDisk),
+
+ case Proceed of
+ true ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: (re)initialize registry (~p)",
+ [self()]),
+ T0 = erlang:timestamp(),
+ Ret = do_initialize_registry(RegistryVsn,
+ AllFeatureFlags,
+ FeatureStates,
+ WrittenToDisk),
+ T1 = erlang:timestamp(),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: time to regen registry: ~p µs",
+ [timer:now_diff(T1, T0)]),
+ Ret;
+ false ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry already up-to-date, skipping init"),
+ ok
+ end.
+
+-spec does_registry_need_refresh(feature_flags(),
+ feature_states(),
+ boolean()) ->
+ boolean().
+
+does_registry_need_refresh(AllFeatureFlags,
+ FeatureStates,
+ WrittenToDisk) ->
+ case rabbit_ff_registry:is_registry_initialized() of
+ true ->
+ %% Before proceeding with the actual
+ %% (re)initialization, let's see if there are any
+ %% changes.
+ CurrentAllFeatureFlags = rabbit_ff_registry:list(all),
+ CurrentFeatureStates = rabbit_ff_registry:states(),
+ CurrentWrittenToDisk =
+ rabbit_ff_registry:is_registry_written_to_disk(),
+
+ if
+ AllFeatureFlags =/= CurrentAllFeatureFlags ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: "
+ "yes, list of feature flags differs"),
+ true;
+ FeatureStates =/= CurrentFeatureStates ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: "
+ "yes, feature flag states differ"),
+ true;
+ WrittenToDisk =/= CurrentWrittenToDisk ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: "
+ "yes, \"written to disk\" state changed"),
+ true;
+ true ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: no"),
+ false
+ end;
+ false ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: "
+ "yes, first-time initialization"),
+ true
+ end.
+
+-spec do_initialize_registry(registry_vsn(),
+ feature_flags(),
+ feature_states(),
+ boolean()) ->
+ ok | restart | {error, any()} | no_return().
+%% @private
+
+do_initialize_registry(RegistryVsn,
+ AllFeatureFlags,
+ FeatureStates,
+ WrittenToDisk) ->
+ %% We log the state of those feature flags.
+ rabbit_log_feature_flags:info(
+ "Feature flags: list of feature flags found:"),
+ lists:foreach(
+ fun(FeatureName) ->
+ rabbit_log_feature_flags:info(
+ "Feature flags: [~s] ~s",
+ [case maps:is_key(FeatureName, FeatureStates) of
+ true ->
+ case maps:get(FeatureName, FeatureStates) of
+ true -> "x";
+ state_changing -> "~"
+ end;
+ false ->
+ " "
+ end,
+ FeatureName])
+ end, lists:sort(maps:keys(AllFeatureFlags))),
+ rabbit_log_feature_flags:info(
+ "Feature flags: feature flag states written to disk: ~s",
+ [case WrittenToDisk of
+ true -> "yes";
+ false -> "no"
+ end]),
+
+ %% We request the registry to be regenerated and reloaded with the
+ %% new state.
+ regen_registry_mod(RegistryVsn,
+ AllFeatureFlags,
+ FeatureStates,
+ WrittenToDisk).
+
+-spec query_supported_feature_flags() -> feature_flags().
+%% @private
+
+-ifdef(TEST).
+-define(PT_TESTSUITE_ATTRS, {?MODULE, testsuite_feature_flags_attrs}).
+
+inject_test_feature_flags(AttributesFromTestsuite) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: injecting feature flags from testsuite: ~p",
+ [AttributesFromTestsuite]),
+ ok = persistent_term:put(?PT_TESTSUITE_ATTRS, AttributesFromTestsuite),
+ initialize_registry().
+
+module_attributes_from_testsuite() ->
+ persistent_term:get(?PT_TESTSUITE_ATTRS, []).
+
+query_supported_feature_flags() ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: query feature flags in loaded applications "
+ "+ testsuite"),
+ T0 = erlang:timestamp(),
+ AttributesPerApp = rabbit_misc:rabbitmq_related_module_attributes(
+ rabbit_feature_flag),
+ AttributesFromTestsuite = module_attributes_from_testsuite(),
+ T1 = erlang:timestamp(),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: time to find supported feature flags: ~p µs",
+ [timer:now_diff(T1, T0)]),
+ AllAttributes = AttributesPerApp ++ AttributesFromTestsuite,
+ prepare_queried_feature_flags(AllAttributes, #{}).
+-else.
+query_supported_feature_flags() ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: query feature flags in loaded applications"),
+ T0 = erlang:timestamp(),
+ AttributesPerApp = rabbit_misc:rabbitmq_related_module_attributes(
+ rabbit_feature_flag),
+ T1 = erlang:timestamp(),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: time to find supported feature flags: ~p µs",
+ [timer:now_diff(T1, T0)]),
+ prepare_queried_feature_flags(AttributesPerApp, #{}).
+-endif.
+
+prepare_queried_feature_flags([{App, _Module, Attributes} | Rest],
+ AllFeatureFlags) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: application `~s` has ~b feature flags",
+ [App, length(Attributes)]),
+ AllFeatureFlags1 = lists:foldl(
+ fun({FeatureName, FeatureProps}, AllFF) ->
+ merge_new_feature_flags(AllFF,
+ App,
+ FeatureName,
+ FeatureProps)
+ end, AllFeatureFlags, Attributes),
+ prepare_queried_feature_flags(Rest, AllFeatureFlags1);
+prepare_queried_feature_flags([], AllFeatureFlags) ->
+ AllFeatureFlags.
+
+-spec merge_new_feature_flags(feature_flags(),
+ atom(),
+ feature_name(),
+ feature_props()) -> feature_flags().
+%% @private
+
+merge_new_feature_flags(AllFeatureFlags, App, FeatureName, FeatureProps)
+ when is_atom(FeatureName) andalso is_map(FeatureProps) ->
+ %% We expand the feature flag properties map with:
+ %% - the name of the application providing it: only informational
+ %% for now, but can be handy to understand that a feature flag
+ %% comes from a plugin.
+ FeatureProps1 = maps:put(provided_by, App, FeatureProps),
+ maps:merge(AllFeatureFlags,
+ #{FeatureName => FeatureProps1}).
+
+-spec regen_registry_mod(registry_vsn(),
+ feature_flags(),
+ feature_states(),
+ boolean()) ->
+ ok | restart | {error, any()} | no_return().
+%% @private
+
+regen_registry_mod(RegistryVsn,
+ AllFeatureFlags,
+ FeatureStates,
+ WrittenToDisk) ->
+ %% Here, we recreate the source code of the `rabbit_ff_registry`
+ %% module from scratch.
+ %%
+ %% IMPORTANT: We want both modules to have the exact same public
+ %% API in order to simplify the life of developers and their tools
+ %% (Dialyzer, completion, and so on).
+
+ %% -module(rabbit_ff_registry).
+ ModuleAttr = erl_syntax:attribute(
+ erl_syntax:atom(module),
+ [erl_syntax:atom(rabbit_ff_registry)]),
+ ModuleForm = erl_syntax:revert(ModuleAttr),
+ %% -export([...]).
+ ExportAttr = erl_syntax:attribute(
+ erl_syntax:atom(export),
+ [erl_syntax:list(
+ [erl_syntax:arity_qualifier(
+ erl_syntax:atom(F),
+ erl_syntax:integer(A))
+ || {F, A} <- [{get, 1},
+ {list, 1},
+ {states, 0},
+ {is_supported, 1},
+ {is_enabled, 1},
+ {is_registry_initialized, 0},
+ {is_registry_written_to_disk, 0}]]
+ )
+ ]
+ ),
+ ExportForm = erl_syntax:revert(ExportAttr),
+ %% get(_) -> ...
+ GetClauses = [erl_syntax:clause(
+ [erl_syntax:atom(FeatureName)],
+ [],
+ [erl_syntax:abstract(maps:get(FeatureName,
+ AllFeatureFlags))])
+ || FeatureName <- maps:keys(AllFeatureFlags)
+ ],
+ GetUnknownClause = erl_syntax:clause(
+ [erl_syntax:variable("_")],
+ [],
+ [erl_syntax:atom(undefined)]),
+ GetFun = erl_syntax:function(
+ erl_syntax:atom(get),
+ GetClauses ++ [GetUnknownClause]),
+ GetFunForm = erl_syntax:revert(GetFun),
+ %% list(_) -> ...
+ ListAllBody = erl_syntax:abstract(AllFeatureFlags),
+ ListAllClause = erl_syntax:clause([erl_syntax:atom(all)],
+ [],
+ [ListAllBody]),
+ EnabledFeatureFlags = maps:filter(
+ fun(FeatureName, _) ->
+ maps:is_key(FeatureName,
+ FeatureStates)
+ andalso
+ maps:get(FeatureName, FeatureStates)
+ =:=
+ true
+ end, AllFeatureFlags),
+ ListEnabledBody = erl_syntax:abstract(EnabledFeatureFlags),
+ ListEnabledClause = erl_syntax:clause(
+ [erl_syntax:atom(enabled)],
+ [],
+ [ListEnabledBody]),
+ DisabledFeatureFlags = maps:filter(
+ fun(FeatureName, _) ->
+ not maps:is_key(FeatureName,
+ FeatureStates)
+ end, AllFeatureFlags),
+ ListDisabledBody = erl_syntax:abstract(DisabledFeatureFlags),
+ ListDisabledClause = erl_syntax:clause(
+ [erl_syntax:atom(disabled)],
+ [],
+ [ListDisabledBody]),
+ StateChangingFeatureFlags = maps:filter(
+ fun(FeatureName, _) ->
+ maps:is_key(FeatureName,
+ FeatureStates)
+ andalso
+ maps:get(FeatureName, FeatureStates)
+ =:=
+ state_changing
+ end, AllFeatureFlags),
+ ListStateChangingBody = erl_syntax:abstract(StateChangingFeatureFlags),
+ ListStateChangingClause = erl_syntax:clause(
+ [erl_syntax:atom(state_changing)],
+ [],
+ [ListStateChangingBody]),
+ ListFun = erl_syntax:function(
+ erl_syntax:atom(list),
+ [ListAllClause,
+ ListEnabledClause,
+ ListDisabledClause,
+ ListStateChangingClause]),
+ ListFunForm = erl_syntax:revert(ListFun),
+ %% states() -> ...
+ StatesBody = erl_syntax:abstract(FeatureStates),
+ StatesClause = erl_syntax:clause([], [], [StatesBody]),
+ StatesFun = erl_syntax:function(
+ erl_syntax:atom(states),
+ [StatesClause]),
+ StatesFunForm = erl_syntax:revert(StatesFun),
+ %% is_supported(_) -> ...
+ IsSupportedClauses = [erl_syntax:clause(
+ [erl_syntax:atom(FeatureName)],
+ [],
+ [erl_syntax:atom(true)])
+ || FeatureName <- maps:keys(AllFeatureFlags)
+ ],
+ NotSupportedClause = erl_syntax:clause(
+ [erl_syntax:variable("_")],
+ [],
+ [erl_syntax:atom(false)]),
+ IsSupportedFun = erl_syntax:function(
+ erl_syntax:atom(is_supported),
+ IsSupportedClauses ++ [NotSupportedClause]),
+ IsSupportedFunForm = erl_syntax:revert(IsSupportedFun),
+ %% is_enabled(_) -> ...
+ IsEnabledClauses = [erl_syntax:clause(
+ [erl_syntax:atom(FeatureName)],
+ [],
+ [case maps:is_key(FeatureName, FeatureStates) of
+ true ->
+ erl_syntax:atom(
+ maps:get(FeatureName, FeatureStates));
+ false ->
+ erl_syntax:atom(false)
+ end])
+ || FeatureName <- maps:keys(AllFeatureFlags)
+ ],
+ NotEnabledClause = erl_syntax:clause(
+ [erl_syntax:variable("_")],
+ [],
+ [erl_syntax:atom(false)]),
+ IsEnabledFun = erl_syntax:function(
+ erl_syntax:atom(is_enabled),
+ IsEnabledClauses ++ [NotEnabledClause]),
+ IsEnabledFunForm = erl_syntax:revert(IsEnabledFun),
+ %% is_registry_initialized() -> ...
+ IsInitializedClauses = [erl_syntax:clause(
+ [],
+ [],
+ [erl_syntax:atom(true)])
+ ],
+ IsInitializedFun = erl_syntax:function(
+ erl_syntax:atom(is_registry_initialized),
+ IsInitializedClauses),
+ IsInitializedFunForm = erl_syntax:revert(IsInitializedFun),
+ %% is_registry_written_to_disk() -> ...
+ IsWrittenToDiskClauses = [erl_syntax:clause(
+ [],
+ [],
+ [erl_syntax:atom(WrittenToDisk)])
+ ],
+ IsWrittenToDiskFun = erl_syntax:function(
+ erl_syntax:atom(is_registry_written_to_disk),
+ IsWrittenToDiskClauses),
+ IsWrittenToDiskFunForm = erl_syntax:revert(IsWrittenToDiskFun),
+ %% Compilation!
+ Forms = [ModuleForm,
+ ExportForm,
+ GetFunForm,
+ ListFunForm,
+ StatesFunForm,
+ IsSupportedFunForm,
+ IsEnabledFunForm,
+ IsInitializedFunForm,
+ IsWrittenToDiskFunForm],
+ maybe_log_registry_source_code(Forms),
+ CompileOpts = [return_errors,
+ return_warnings],
+ case compile:forms(Forms, CompileOpts) of
+ {ok, Mod, Bin, _} ->
+ load_registry_mod(RegistryVsn, Mod, Bin);
+ {error, Errors, Warnings} ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: registry compilation:~n"
+ "Errors: ~p~n"
+ "Warnings: ~p",
+ [Errors, Warnings]),
+ {error, {compilation_failure, Errors, Warnings}}
+ end.
+
+maybe_log_registry_source_code(Forms) ->
+ case rabbit_prelaunch:get_context() of
+ #{log_feature_flags_registry := true} ->
+ rabbit_log_feature_flags:debug(
+ "== FEATURE FLAGS REGISTRY ==~n"
+ "~s~n"
+ "== END ==~n",
+ [erl_prettypr:format(erl_syntax:form_list(Forms))]);
+ _ ->
+ ok
+ end.
+
+-ifdef(TEST).
+registry_loading_lock() -> ?FF_REGISTRY_LOADING_LOCK.
+-endif.
+
+-spec load_registry_mod(registry_vsn(), atom(), binary()) ->
+ ok | restart | no_return().
+%% @private
+
+load_registry_mod(RegistryVsn, Mod, Bin) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry module ready, loading it (~p)...",
+ [self()]),
+ FakeFilename = "Compiled and loaded by " ?MODULE_STRING,
+ %% Time to load the new registry, replacing the old one. We use a
+ %% lock here to synchronize concurrent reloads.
+ global:set_lock(?FF_REGISTRY_LOADING_LOCK, [node()]),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: acquired lock before reloading registry module (~p)",
+ [self()]),
+ %% We want to make sure that the old registry (not the one being
+ %% currently in use) is purged by the code server. It means no
+ %% process lingers on that old code.
+ %%
+ %% We use code:soft_purge() for that (meaning no process is killed)
+ %% and we wait in an infinite loop for that to succeed.
+ ok = purge_old_registry(Mod),
+ %% Now we can replace the currently loaded registry by the new one.
+ %% The code server takes care of marking the current registry as old
+ %% and load the new module in an atomic operation.
+ %%
+ %% Therefore there is no chance of a window where there is no
+ %% registry module available, causing the one on disk to be
+ %% reloaded.
+ Ret = case registry_vsn() of
+ RegistryVsn -> code:load_binary(Mod, FakeFilename, Bin);
+ OtherVsn -> {error, {restart, RegistryVsn, OtherVsn}}
+ end,
+ rabbit_log_feature_flags:debug(
+ "Feature flags: releasing lock after reloading registry module (~p)",
+ [self()]),
+ global:del_lock(?FF_REGISTRY_LOADING_LOCK, [node()]),
+ case Ret of
+ {module, _} ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry module loaded (vsn: ~p -> ~p)",
+ [RegistryVsn, registry_vsn()]),
+ ok;
+ {error, {restart, Expected, Current}} ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: another registry module was loaded in the "
+ "meantime (expected old vsn: ~p, current vsn: ~p); "
+ "restarting the regen",
+ [Expected, Current]),
+ restart;
+ {error, Reason} ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: failed to load registry module: ~p",
+ [Reason]),
+ throw({feature_flag_registry_reload_failure, Reason})
+ end.
+
+-spec registry_vsn() -> registry_vsn().
+%% @private
+
+registry_vsn() ->
+ Attrs = rabbit_ff_registry:module_info(attributes),
+ proplists:get_value(vsn, Attrs, undefined).
+
+purge_old_registry(Mod) ->
+ case code:is_loaded(Mod) of
+ {file, _} -> do_purge_old_registry(Mod);
+ false -> ok
+ end.
+
+do_purge_old_registry(Mod) ->
+ case code:soft_purge(Mod) of
+ true -> ok;
+ false -> do_purge_old_registry(Mod)
+ end.
+
+%% -------------------------------------------------------------------
+%% Feature flags state storage.
+%% -------------------------------------------------------------------
+
+-spec ensure_enabled_feature_flags_list_file_exists() -> ok | {error, any()}.
+%% @private
+
+ensure_enabled_feature_flags_list_file_exists() ->
+ File = enabled_feature_flags_list_file(),
+ case filelib:is_regular(File) of
+ true -> ok;
+ false -> write_enabled_feature_flags_list([])
+ end.
+
+-spec read_enabled_feature_flags_list() ->
+ [feature_name()] | no_return().
+%% @private
+
+read_enabled_feature_flags_list() ->
+ case try_to_read_enabled_feature_flags_list() of
+ {error, Reason} ->
+ File = enabled_feature_flags_list_file(),
+ throw({feature_flags_file_read_error, File, Reason});
+ Ret ->
+ Ret
+ end.
+
+-spec try_to_read_enabled_feature_flags_list() ->
+ [feature_name()] | {error, any()}.
+%% @private
+
+try_to_read_enabled_feature_flags_list() ->
+ File = enabled_feature_flags_list_file(),
+ case file:consult(File) of
+ {ok, [List]} ->
+ List;
+ {error, enoent} ->
+ %% If the file is missing, we consider the list of enabled
+ %% feature flags to be empty.
+ [];
+ {error, Reason} = Error ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: failed to read the `feature_flags` "
+ "file at `~s`: ~s",
+ [File, file:format_error(Reason)]),
+ Error
+ end.
+
+-spec write_enabled_feature_flags_list([feature_name()]) ->
+ ok | no_return().
+%% @private
+
+write_enabled_feature_flags_list(FeatureNames) ->
+ case try_to_write_enabled_feature_flags_list(FeatureNames) of
+ {error, Reason} ->
+ File = enabled_feature_flags_list_file(),
+ throw({feature_flags_file_write_error, File, Reason});
+ Ret ->
+ Ret
+ end.
+
+-spec try_to_write_enabled_feature_flags_list([feature_name()]) ->
+ ok | {error, any()}.
+%% @private
+
+try_to_write_enabled_feature_flags_list(FeatureNames) ->
+ %% Before writing the new file, we read the existing one. If there
+ %% are unknown feature flags in that file, we want to keep their
+ %% state, even though they are unsupported at this time. It could be
+ %% that a plugin was disabled in the meantime.
+ %%
+ %% FIXME: Lock this code to fix concurrent read/modify/write.
+ PreviouslyEnabled = case try_to_read_enabled_feature_flags_list() of
+ {error, _} -> [];
+ List -> List
+ end,
+ FeatureNames1 = lists:foldl(
+ fun(Name, Acc) ->
+ case is_supported_locally(Name) of
+ true -> Acc;
+ false -> [Name | Acc]
+ end
+ end, FeatureNames, PreviouslyEnabled),
+ FeatureNames2 = lists:sort(FeatureNames1),
+
+ File = enabled_feature_flags_list_file(),
+ Content = io_lib:format("~p.~n", [FeatureNames2]),
+ %% TODO: If we fail to write the the file, we should spawn a process
+ %% to retry the operation.
+ case file:write_file(File, Content) of
+ ok ->
+ ok;
+ {error, Reason} = Error ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: failed to write the `feature_flags` "
+ "file at `~s`: ~s",
+ [File, file:format_error(Reason)]),
+ Error
+ end.
+
+-spec enabled_feature_flags_list_file() -> file:filename().
+%% @doc
+%% Returns the path to the file where the state of feature flags is stored.
+%%
+%% @returns the path to the file.
+
+enabled_feature_flags_list_file() ->
+ case application:get_env(rabbit, feature_flags_file) of
+ {ok, Val} -> Val;
+ undefined -> throw(feature_flags_file_not_set)
+ end.
+
+%% -------------------------------------------------------------------
+%% Feature flags management: enabling.
+%% -------------------------------------------------------------------
+
+-spec do_enable(feature_name()) -> ok | {error, any()} | no_return().
+%% @private
+
+do_enable(FeatureName) ->
+ %% We mark this feature flag as "state changing" before doing the
+ %% actual state change. We also take a global lock: this permits
+ %% to block callers asking about a feature flag changing state.
+ global:set_lock(?FF_STATE_CHANGE_LOCK),
+ Ret = case mark_as_enabled(FeatureName, state_changing) of
+ ok ->
+ case enable_dependencies(FeatureName, true) of
+ ok ->
+ case run_migration_fun(FeatureName, enable) of
+ ok ->
+ mark_as_enabled(FeatureName, true);
+ {error, no_migration_fun} ->
+ mark_as_enabled(FeatureName, true);
+ Error ->
+ Error
+ end;
+ Error ->
+ Error
+ end;
+ Error ->
+ Error
+ end,
+ case Ret of
+ ok -> ok;
+ _ -> mark_as_enabled(FeatureName, false)
+ end,
+ global:del_lock(?FF_STATE_CHANGE_LOCK),
+ Ret.
+
+-spec enable_locally(feature_name()) -> ok | {error, any()} | no_return().
+%% @private
+
+enable_locally(FeatureName) when is_atom(FeatureName) ->
+ case is_enabled(FeatureName) of
+ true ->
+ ok;
+ false ->
+ rabbit_log_feature_flags:debug(
+ "Feature flag `~s`: enable locally (as part of feature "
+ "flag states synchronization)",
+ [FeatureName]),
+ do_enable_locally(FeatureName)
+ end.
+
+-spec do_enable_locally(feature_name()) -> ok | {error, any()} | no_return().
+%% @private
+
+do_enable_locally(FeatureName) ->
+ case enable_dependencies(FeatureName, false) of
+ ok ->
+ case run_migration_fun(FeatureName, enable) of
+ ok ->
+ mark_as_enabled_locally(FeatureName, true);
+ {error, no_migration_fun} ->
+ mark_as_enabled_locally(FeatureName, true);
+ Error ->
+ Error
+ end;
+ Error ->
+ Error
+ end.
+
+-spec enable_dependencies(feature_name(), boolean()) ->
+ ok | {error, any()} | no_return().
+%% @private
+
+enable_dependencies(FeatureName, Everywhere) ->
+ FeatureProps = rabbit_ff_registry:get(FeatureName),
+ DependsOn = maps:get(depends_on, FeatureProps, []),
+ rabbit_log_feature_flags:debug(
+ "Feature flag `~s`: enable dependencies: ~p",
+ [FeatureName, DependsOn]),
+ enable_dependencies(FeatureName, DependsOn, Everywhere).
+
+-spec enable_dependencies(feature_name(), [feature_name()], boolean()) ->
+ ok | {error, any()} | no_return().
+%% @private
+
+enable_dependencies(TopLevelFeatureName, [FeatureName | Rest], Everywhere) ->
+ Ret = case Everywhere of
+ true -> enable(FeatureName);
+ false -> enable_locally(FeatureName)
+ end,
+ case Ret of
+ ok -> enable_dependencies(TopLevelFeatureName, Rest, Everywhere);
+ Error -> Error
+ end;
+enable_dependencies(_, [], _) ->
+ ok.
+
+-spec run_migration_fun(feature_name(), any()) ->
+ any() | {error, any()}.
+%% @private
+
+run_migration_fun(FeatureName, Arg) ->
+ FeatureProps = rabbit_ff_registry:get(FeatureName),
+ run_migration_fun(FeatureName, FeatureProps, Arg).
+
+run_migration_fun(FeatureName, FeatureProps, Arg) ->
+ case maps:get(migration_fun, FeatureProps, none) of
+ {MigrationMod, MigrationFun}
+ when is_atom(MigrationMod) andalso is_atom(MigrationFun) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flag `~s`: run migration function ~p with arg: ~p",
+ [FeatureName, MigrationFun, Arg]),
+ try
+ erlang:apply(MigrationMod,
+ MigrationFun,
+ [FeatureName, FeatureProps, Arg])
+ catch
+ _:Reason:Stacktrace ->
+ rabbit_log_feature_flags:error(
+ "Feature flag `~s`: migration function crashed: ~p~n~p",
+ [FeatureName, Reason, Stacktrace]),
+ {error, {migration_fun_crash, Reason, Stacktrace}}
+ end;
+ none ->
+ {error, no_migration_fun};
+ Invalid ->
+ rabbit_log_feature_flags:error(
+ "Feature flag `~s`: invalid migration function: ~p",
+ [FeatureName, Invalid]),
+ {error, {invalid_migration_fun, Invalid}}
+ end.
+
+-spec mark_as_enabled(feature_name(), feature_state()) ->
+ any() | {error, any()} | no_return().
+%% @private
+
+mark_as_enabled(FeatureName, IsEnabled) ->
+ case mark_as_enabled_locally(FeatureName, IsEnabled) of
+ ok ->
+ mark_as_enabled_remotely(FeatureName, IsEnabled);
+ Error ->
+ Error
+ end.
+
+-spec mark_as_enabled_locally(feature_name(), feature_state()) ->
+ any() | {error, any()} | no_return().
+%% @private
+
+mark_as_enabled_locally(FeatureName, IsEnabled) ->
+ rabbit_log_feature_flags:info(
+ "Feature flag `~s`: mark as enabled=~p",
+ [FeatureName, IsEnabled]),
+ EnabledFeatureNames = maps:keys(list(enabled)),
+ NewEnabledFeatureNames = case IsEnabled of
+ true ->
+ [FeatureName | EnabledFeatureNames];
+ false ->
+ EnabledFeatureNames -- [FeatureName];
+ state_changing ->
+ EnabledFeatureNames
+ end,
+ WrittenToDisk = case NewEnabledFeatureNames of
+ EnabledFeatureNames ->
+ rabbit_ff_registry:is_registry_written_to_disk();
+ _ ->
+ ok =:= try_to_write_enabled_feature_flags_list(
+ NewEnabledFeatureNames)
+ end,
+ initialize_registry(#{},
+ #{FeatureName => IsEnabled},
+ WrittenToDisk).
+
+-spec mark_as_enabled_remotely(feature_name(), feature_state()) ->
+ any() | {error, any()} | no_return().
+%% @private
+
+mark_as_enabled_remotely(FeatureName, IsEnabled) ->
+ Nodes = running_remote_nodes(),
+ mark_as_enabled_remotely(Nodes, FeatureName, IsEnabled, ?TIMEOUT).
+
+-spec mark_as_enabled_remotely([node()],
+ feature_name(),
+ feature_state(),
+ timeout()) ->
+ any() | {error, any()} | no_return().
+%% @private
+
+mark_as_enabled_remotely([], _FeatureName, _IsEnabled, _Timeout) ->
+ ok;
+mark_as_enabled_remotely(Nodes, FeatureName, IsEnabled, Timeout) ->
+ T0 = erlang:timestamp(),
+ Rets = [{Node, rpc:call(Node,
+ ?MODULE,
+ mark_as_enabled_locally,
+ [FeatureName, IsEnabled],
+ Timeout)}
+ || Node <- Nodes],
+ FailedNodes = [Node || {Node, Ret} <- Rets, Ret =/= ok],
+ case FailedNodes of
+ [] ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: `~s` successfully marked as enabled=~p on all "
+ "nodes", [FeatureName, IsEnabled]),
+ ok;
+ _ ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: failed to mark feature flag `~s` as enabled=~p "
+ "on the following nodes:", [FeatureName, IsEnabled]),
+ [rabbit_log_feature_flags:error(
+ "Feature flags: - ~s: ~p",
+ [Node, Ret])
+ || {Node, Ret} <- Rets,
+ Ret =/= ok],
+ Sleep = 1000,
+ T1 = erlang:timestamp(),
+ Duration = timer:now_diff(T1, T0),
+ NewTimeout = (Timeout * 1000 - Duration) div 1000 - Sleep,
+ if
+ NewTimeout > 0 ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: retrying with a timeout of ~b "
+ "ms after sleeping for ~b ms",
+ [NewTimeout, Sleep]),
+ timer:sleep(Sleep),
+ mark_as_enabled_remotely(FailedNodes,
+ FeatureName,
+ IsEnabled,
+ NewTimeout);
+ true ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: not retrying; RPC went over the "
+ "~b milliseconds timeout", [Timeout]),
+ %% FIXME: Is crashing the process the best solution here?
+ throw(
+ {failed_to_mark_feature_flag_as_enabled_on_remote_nodes,
+ FeatureName, IsEnabled, FailedNodes})
+ end
+ end.
+
+%% -------------------------------------------------------------------
+%% Coordination with remote nodes.
+%% -------------------------------------------------------------------
+
+-spec remote_nodes() -> [node()].
+%% @private
+
+remote_nodes() ->
+ mnesia:system_info(db_nodes) -- [node()].
+
+-spec running_remote_nodes() -> [node()].
+%% @private
+
+running_remote_nodes() ->
+ mnesia:system_info(running_db_nodes) -- [node()].
+
+query_running_remote_nodes(Node, Timeout) ->
+ case rpc:call(Node, mnesia, system_info, [running_db_nodes], Timeout) of
+ {badrpc, _} = Error -> Error;
+ Nodes -> Nodes -- [node()]
+ end.
+
+-spec does_node_support(node(), [feature_name()], timeout()) -> boolean().
+%% @private
+
+does_node_support(Node, FeatureNames, Timeout) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: querying `~p` support on node ~s...",
+ [FeatureNames, Node]),
+ Ret = case node() of
+ Node ->
+ is_supported_locally(FeatureNames);
+ _ ->
+ run_feature_flags_mod_on_remote_node(
+ Node, is_supported_locally, [FeatureNames], Timeout)
+ end,
+ case Ret of
+ {error, pre_feature_flags_rabbitmq} ->
+ %% See run_feature_flags_mod_on_remote_node/4 for
+ %% an explanation why we consider this node a 3.7.x
+ %% pre-feature-flags node.
+ rabbit_log_feature_flags:debug(
+ "Feature flags: no feature flags support on node `~s`, "
+ "consider the feature flags unsupported: ~p",
+ [Node, FeatureNames]),
+ false;
+ {error, Reason} ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: error while querying `~p` support on "
+ "node ~s: ~p",
+ [FeatureNames, Node, Reason]),
+ false;
+ true ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: node `~s` supports `~p`",
+ [Node, FeatureNames]),
+ true;
+ false ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: node `~s` does not support `~p`; "
+ "stopping query here",
+ [Node, FeatureNames]),
+ false
+ end.
+
+-spec check_node_compatibility(node()) -> ok | {error, any()}.
+%% @doc
+%% Checks if a node is compatible with the local node.
+%%
+%% To be compatible, the following two conditions must be met:
+%% <ol>
+%% <li>feature flags enabled on the local node must be supported by the
+%% remote node</li>
+%% <li>feature flags enabled on the remote node must be supported by the
+%% local node</li>
+%% </ol>
+%%
+%% @param Node the name of the remote node to test.
+%% @returns `ok' if they are compatible, `{error, Reason}' if they are not.
+
+check_node_compatibility(Node) ->
+ check_node_compatibility(Node, ?TIMEOUT).
+
+-spec check_node_compatibility(node(), timeout()) -> ok | {error, any()}.
+%% @doc
+%% Checks if a node is compatible with the local node.
+%%
+%% See {@link check_node_compatibility/1} for the conditions required to
+%% consider two nodes compatible.
+%%
+%% @param Node the name of the remote node to test.
+%% @param Timeout Time in milliseconds after which the RPC gives up.
+%% @returns `ok' if they are compatible, `{error, Reason}' if they are not.
+%%
+%% @see check_node_compatibility/1
+
+check_node_compatibility(Node, Timeout) ->
+ %% Before checking compatibility, we exchange feature flags from
+ %% unknown Erlang applications. So we fetch remote feature flags
+ %% from applications which are not loaded locally, and the opposite.
+ %%
+ %% The goal is that such feature flags are not blocking the
+ %% communication between nodes because the code (which would
+ %% break) is missing on those nodes. Therefore they should not be
+ %% considered when determining compatibility.
+ exchange_feature_flags_from_unknown_apps(Node, Timeout),
+
+ %% FIXME:
+ %% When we try to cluster two nodes, we get:
+ %% Feature flags: starting an unclustered node: all feature flags
+ %% will be enabled by default
+ %% It should probably not be the case...
+
+ %% We can now proceed with the actual compatibility check.
+ rabbit_log_feature_flags:debug(
+ "Feature flags: node `~s` compatibility check, part 1/2",
+ [Node]),
+ Part1 = local_enabled_feature_flags_is_supported_remotely(Node, Timeout),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: node `~s` compatibility check, part 2/2",
+ [Node]),
+ Part2 = remote_enabled_feature_flags_is_supported_locally(Node, Timeout),
+ case {Part1, Part2} of
+ {true, true} ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: node `~s` is compatible",
+ [Node]),
+ ok;
+ {false, _} ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: node `~s` is INCOMPATIBLE: "
+ "feature flags enabled locally are not supported remotely",
+ [Node]),
+ {error, incompatible_feature_flags};
+ {_, false} ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: node `~s` is INCOMPATIBLE: "
+ "feature flags enabled remotely are not supported locally",
+ [Node]),
+ {error, incompatible_feature_flags}
+ end.
+
+-spec is_node_compatible(node()) -> boolean().
+%% @doc
+%% Returns if a node is compatible with the local node.
+%%
+%% This function calls {@link check_node_compatibility/2} and returns
+%% `true' the latter returns `ok'. Therefore this is the same code,
+%% except that this function returns a boolean, but not the reason of
+%% the incompatibility if any.
+%%
+%% @param Node the name of the remote node to test.
+%% @returns `true' if they are compatible, `false' otherwise.
+
+is_node_compatible(Node) ->
+ is_node_compatible(Node, ?TIMEOUT).
+
+-spec is_node_compatible(node(), timeout()) -> boolean().
+%% @doc
+%% Returns if a node is compatible with the local node.
+%%
+%% This function calls {@link check_node_compatibility/2} and returns
+%% `true' the latter returns `ok'. Therefore this is the same code,
+%% except that this function returns a boolean, but not the reason
+%% of the incompatibility if any. If the RPC times out, nodes are
+%% considered incompatible.
+%%
+%% @param Node the name of the remote node to test.
+%% @param Timeout Time in milliseconds after which the RPC gives up.
+%% @returns `true' if they are compatible, `false' otherwise.
+
+is_node_compatible(Node, Timeout) ->
+ check_node_compatibility(Node, Timeout) =:= ok.
+
+-spec local_enabled_feature_flags_is_supported_remotely(node(),
+ timeout()) ->
+ boolean().
+%% @private
+
+local_enabled_feature_flags_is_supported_remotely(Node, Timeout) ->
+ LocalEnabledFeatureNames = maps:keys(list(enabled)),
+ is_supported_remotely([Node], LocalEnabledFeatureNames, Timeout).
+
+-spec remote_enabled_feature_flags_is_supported_locally(node(),
+ timeout()) ->
+ boolean().
+%% @private
+
+remote_enabled_feature_flags_is_supported_locally(Node, Timeout) ->
+ case query_remote_feature_flags(Node, enabled, Timeout) of
+ {error, _} ->
+ false;
+ RemoteEnabledFeatureFlags when is_map(RemoteEnabledFeatureFlags) ->
+ RemoteEnabledFeatureNames = maps:keys(RemoteEnabledFeatureFlags),
+ is_supported_locally(RemoteEnabledFeatureNames)
+ end.
+
+-spec run_feature_flags_mod_on_remote_node(node(),
+ atom(),
+ [term()],
+ timeout()) ->
+ term() | {error, term()}.
+%% @private
+
+run_feature_flags_mod_on_remote_node(Node, Function, Args, Timeout) ->
+ case rpc:call(Node, ?MODULE, Function, Args, Timeout) of
+ {badrpc, {'EXIT',
+ {undef,
+ [{?MODULE, Function, Args, []}
+ | _]}}} ->
+ %% If rabbit_feature_flags:Function() is undefined
+ %% on the remote node, we consider it to be a 3.7.x
+ %% pre-feature-flags node.
+ %%
+ %% Theoretically, it could be an older version (3.6.x and
+ %% older). But the RabbitMQ version consistency check
+ %% (rabbit_misc:version_minor_equivalent/2) called from
+ %% rabbit_mnesia:check_rabbit_consistency/2 already blocked
+ %% this situation from happening before we reach this point.
+ rabbit_log_feature_flags:debug(
+ "Feature flags: ~s:~s~p unavailable on node `~s`: "
+ "assuming it is a RabbitMQ 3.7.x pre-feature-flags node",
+ [?MODULE, Function, Args, Node]),
+ {error, pre_feature_flags_rabbitmq};
+ {badrpc, Reason} = Error ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: error while running ~s:~s~p "
+ "on node `~s`: ~p",
+ [?MODULE, Function, Args, Node, Reason]),
+ {error, Error};
+ Ret ->
+ Ret
+ end.
+
+-spec query_remote_feature_flags(node(),
+ Which :: all | enabled | disabled,
+ timeout()) ->
+ feature_flags() | {error, any()}.
+%% @private
+
+query_remote_feature_flags(Node, Which, Timeout) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: querying ~s feature flags on node `~s`...",
+ [Which, Node]),
+ case run_feature_flags_mod_on_remote_node(Node, list, [Which], Timeout) of
+ {error, pre_feature_flags_rabbitmq} ->
+ %% See run_feature_flags_mod_on_remote_node/4 for
+ %% an explanation why we consider this node a 3.7.x
+ %% pre-feature-flags node.
+ rabbit_log_feature_flags:debug(
+ "Feature flags: no feature flags support on node `~s`, "
+ "consider the list of feature flags empty", [Node]),
+ #{};
+ {error, Reason} = Error ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: error while querying ~s feature flags "
+ "on node `~s`: ~p",
+ [Which, Node, Reason]),
+ Error;
+ RemoteFeatureFlags when is_map(RemoteFeatureFlags) ->
+ RemoteFeatureNames = maps:keys(RemoteFeatureFlags),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: querying ~s feature flags on node `~s` "
+ "done; ~s features: ~p",
+ [Which, Node, Which, RemoteFeatureNames]),
+ RemoteFeatureFlags
+ end.
+
+-spec merge_feature_flags_from_unknown_apps(feature_flags()) ->
+ ok | {error, any()}.
+%% @private
+
+merge_feature_flags_from_unknown_apps(FeatureFlags)
+ when is_map(FeatureFlags) ->
+ LoadedApps = [App || {App, _, _} <- application:loaded_applications()],
+ FeatureFlagsFromUnknownApps =
+ maps:fold(
+ fun(FeatureName, FeatureProps, UnknownFF) ->
+ case is_supported_locally(FeatureName) of
+ true ->
+ UnknownFF;
+ false ->
+ FeatureProvider = maps:get(provided_by, FeatureProps),
+ case lists:member(FeatureProvider, LoadedApps) of
+ true -> UnknownFF;
+ false -> maps:put(FeatureName, FeatureProps,
+ UnknownFF)
+ end
+ end
+ end,
+ #{},
+ FeatureFlags),
+ case maps:keys(FeatureFlagsFromUnknownApps) of
+ [] ->
+ ok;
+ _ ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: register feature flags provided by applications "
+ "unknown locally: ~p",
+ [maps:keys(FeatureFlagsFromUnknownApps)]),
+ initialize_registry(FeatureFlagsFromUnknownApps)
+ end.
+
+exchange_feature_flags_from_unknown_apps(Node, Timeout) ->
+ %% The first step is to fetch feature flags from Erlang applications
+ %% we don't know locally (they are loaded remotely, but not
+ %% locally).
+ fetch_remote_feature_flags_from_apps_unknown_locally(Node, Timeout),
+
+ %% The next step is to do the opposite: push feature flags to remote
+ %% nodes so they can register those from applications they don't
+ %% know.
+ push_local_feature_flags_from_apps_unknown_remotely(Node, Timeout).
+
+fetch_remote_feature_flags_from_apps_unknown_locally(Node, Timeout) ->
+ RemoteFeatureFlags = query_remote_feature_flags(Node, all, Timeout),
+ merge_feature_flags_from_unknown_apps(RemoteFeatureFlags).
+
+push_local_feature_flags_from_apps_unknown_remotely(Node, Timeout) ->
+ LocalFeatureFlags = list(all),
+ push_local_feature_flags_from_apps_unknown_remotely(
+ Node, LocalFeatureFlags, Timeout).
+
+push_local_feature_flags_from_apps_unknown_remotely(
+ Node, FeatureFlags, Timeout)
+ when map_size(FeatureFlags) > 0 ->
+ case query_running_remote_nodes(Node, Timeout) of
+ {badrpc, Reason} ->
+ {error, Reason};
+ Nodes ->
+ lists:foreach(
+ fun(N) ->
+ run_feature_flags_mod_on_remote_node(
+ N,
+ merge_feature_flags_from_unknown_apps,
+ [FeatureFlags],
+ Timeout)
+ end, Nodes)
+ end;
+push_local_feature_flags_from_apps_unknown_remotely(_, _, _) ->
+ ok.
+
+-spec sync_feature_flags_with_cluster([node()], boolean()) ->
+ ok | {error, any()} | no_return().
+%% @private
+
+sync_feature_flags_with_cluster(Nodes, NodeIsVirgin) ->
+ sync_feature_flags_with_cluster(Nodes, NodeIsVirgin, ?TIMEOUT).
+
+-spec sync_feature_flags_with_cluster([node()], boolean(), timeout()) ->
+ ok | {error, any()} | no_return().
+%% @private
+
+sync_feature_flags_with_cluster([], NodeIsVirgin, _) ->
+ verify_which_feature_flags_are_actually_enabled(),
+ case NodeIsVirgin of
+ true ->
+ FeatureNames = get_forced_feature_flag_names(),
+ case remote_nodes() of
+ [] when FeatureNames =:= undefined ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: starting an unclustered node "
+ "for the first time: all feature flags will be "
+ "enabled by default"),
+ enable_all();
+ [] ->
+ case FeatureNames of
+ [] ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: starting an unclustered "
+ "node for the first time: all feature "
+ "flags are forcibly left disabled from "
+ "the $RABBITMQ_FEATURE_FLAGS environment "
+ "variable"),
+ ok;
+ _ ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: starting an unclustered "
+ "node for the first time: only the "
+ "following feature flags specified in "
+ "the $RABBITMQ_FEATURE_FLAGS environment "
+ "variable will be enabled: ~p",
+ [FeatureNames]),
+ enable(FeatureNames)
+ end;
+ _ ->
+ ok
+ end;
+ false ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: starting an unclustered node which is "
+ "already initialized: all feature flags left in their "
+ "current state"),
+ ok
+ end;
+sync_feature_flags_with_cluster(Nodes, _, Timeout) ->
+ verify_which_feature_flags_are_actually_enabled(),
+ RemoteNodes = Nodes -- [node()],
+ sync_feature_flags_with_cluster1(RemoteNodes, Timeout).
+
+sync_feature_flags_with_cluster1([], _) ->
+ ok;
+sync_feature_flags_with_cluster1(RemoteNodes, Timeout) ->
+ RandomRemoteNode = pick_one_node(RemoteNodes),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: SYNCING FEATURE FLAGS with node `~s`...",
+ [RandomRemoteNode]),
+ case query_remote_feature_flags(RandomRemoteNode, enabled, Timeout) of
+ {error, _} = Error ->
+ Error;
+ RemoteFeatureFlags ->
+ RemoteFeatureNames = maps:keys(RemoteFeatureFlags),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: enabling locally feature flags already "
+ "enabled on node `~s`...",
+ [RandomRemoteNode]),
+ case do_sync_feature_flags_with_node(RemoteFeatureNames) of
+ ok ->
+ sync_feature_flags_with_cluster2(
+ RandomRemoteNode, Timeout);
+ Error ->
+ Error
+ end
+ end.
+
+sync_feature_flags_with_cluster2(RandomRemoteNode, Timeout) ->
+ LocalFeatureNames = maps:keys(list(enabled)),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: enabling on node `~s` feature flags already "
+ "enabled locally...",
+ [RandomRemoteNode]),
+ Ret = run_feature_flags_mod_on_remote_node(
+ RandomRemoteNode,
+ do_sync_feature_flags_with_node,
+ [LocalFeatureNames],
+ Timeout),
+ case Ret of
+ {error, pre_feature_flags_rabbitmq} -> ok;
+ _ -> Ret
+ end.
+
+pick_one_node(Nodes) ->
+ RandomIndex = rand:uniform(length(Nodes)),
+ lists:nth(RandomIndex, Nodes).
+
+do_sync_feature_flags_with_node([FeatureFlag | Rest]) ->
+ case enable_locally(FeatureFlag) of
+ ok -> do_sync_feature_flags_with_node(Rest);
+ Error -> Error
+ end;
+do_sync_feature_flags_with_node([]) ->
+ ok.
+
+-spec get_forced_feature_flag_names() -> [feature_name()] | undefined.
+%% @private
+%% @doc
+%% Returns the (possibly empty) list of feature flags the user want
+%% to enable out-of-the-box when starting a node for the first time.
+%%
+%% Without this, the default is to enable all the supported feature
+%% flags.
+%%
+%% There are two ways to specify that list:
+%% <ol>
+%% <li>Using the `$RABBITMQ_FEATURE_FLAGS' environment variable; for
+%% instance `RABBITMQ_FEATURE_FLAGS=quorum_queue,mnevis'.</li>
+%% <li>Using the `forced_feature_flags_on_init' configuration parameter;
+%% for instance
+%% `{rabbit, [{forced_feature_flags_on_init, [quorum_queue, mnevis]}]}'.</li>
+%% </ol>
+%%
+%% The environment variable has precedence over the configuration
+%% parameter.
+
+get_forced_feature_flag_names() ->
+ Ret = case get_forced_feature_flag_names_from_env() of
+ undefined -> get_forced_feature_flag_names_from_config();
+ List -> List
+ end,
+ case Ret of
+ undefined -> ok;
+ [] -> rabbit_log_feature_flags:info(
+ "Feature flags: automatic enablement of feature "
+ "flags disabled (i.e. none will be enabled "
+ "automatically)");
+ _ -> rabbit_log_feature_flags:info(
+ "Feature flags: automatic enablement of feature "
+ "flags limited to the following list: ~p", [Ret])
+ end,
+ Ret.
+
+-spec get_forced_feature_flag_names_from_env() -> [feature_name()] | undefined.
+%% @private
+
+get_forced_feature_flag_names_from_env() ->
+ case rabbit_prelaunch:get_context() of
+ #{forced_feature_flags_on_init := ForcedFFs}
+ when is_list(ForcedFFs) ->
+ ForcedFFs;
+ _ ->
+ undefined
+ end.
+
+-spec get_forced_feature_flag_names_from_config() -> [feature_name()] | undefined.
+%% @private
+
+get_forced_feature_flag_names_from_config() ->
+ Value = application:get_env(rabbit,
+ forced_feature_flags_on_init,
+ undefined),
+ case Value of
+ undefined ->
+ Value;
+ _ when is_list(Value) ->
+ case lists:all(fun is_atom/1, Value) of
+ true -> Value;
+ false -> undefined
+ end;
+ _ ->
+ undefined
+ end.
+
+-spec verify_which_feature_flags_are_actually_enabled() ->
+ ok | {error, any()} | no_return().
+%% @private
+
+verify_which_feature_flags_are_actually_enabled() ->
+ AllFeatureFlags = list(all),
+ EnabledFeatureNames = read_enabled_feature_flags_list(),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: double-checking feature flag states..."),
+ %% In case the previous instance of the node failed to write the
+ %% feature flags list file, we want to double-check the list of
+ %% enabled feature flags read from disk. For each feature flag,
+ %% we call the migration function to query if the feature flag is
+ %% actually enabled.
+ %%
+ %% If a feature flag doesn't provide a migration function (or if the
+ %% function fails), we keep the current state of the feature flag.
+ List1 = maps:fold(
+ fun(Name, Props, Acc) ->
+ Ret = run_migration_fun(Name, Props, is_enabled),
+ case Ret of
+ true ->
+ [Name | Acc];
+ false ->
+ Acc;
+ _ ->
+ MarkedAsEnabled = is_enabled(Name),
+ case MarkedAsEnabled of
+ true -> [Name | Acc];
+ false -> Acc
+ end
+ end
+ end,
+ [], AllFeatureFlags),
+ RepairedEnabledFeatureNames = lists:sort(List1),
+ %% We log the list of feature flags for which the state changes
+ %% after the check above.
+ WereEnabled = RepairedEnabledFeatureNames -- EnabledFeatureNames,
+ WereDisabled = EnabledFeatureNames -- RepairedEnabledFeatureNames,
+ case {WereEnabled, WereDisabled} of
+ {[], []} -> ok;
+ _ -> rabbit_log_feature_flags:warning(
+ "Feature flags: the previous instance of this node "
+ "must have failed to write the `feature_flags` "
+ "file at `~s`:",
+ [enabled_feature_flags_list_file()])
+ end,
+ case WereEnabled of
+ [] -> ok;
+ _ -> rabbit_log_feature_flags:warning(
+ "Feature flags: - list of previously enabled "
+ "feature flags now marked as such: ~p", [WereEnabled])
+ end,
+ case WereDisabled of
+ [] -> ok;
+ _ -> rabbit_log_feature_flags:warning(
+ "Feature flags: - list of previously disabled "
+ "feature flags now marked as such: ~p", [WereDisabled])
+ end,
+ %% Finally, if the new list of enabled feature flags is different
+ %% than the one on disk, we write the new list and re-initialize the
+ %% registry.
+ case RepairedEnabledFeatureNames of
+ EnabledFeatureNames ->
+ ok;
+ _ ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: write the repaired list of enabled feature "
+ "flags"),
+ WrittenToDisk = ok =:= try_to_write_enabled_feature_flags_list(
+ RepairedEnabledFeatureNames),
+ initialize_registry(
+ #{},
+ list_of_enabled_feature_flags_to_feature_states(
+ RepairedEnabledFeatureNames),
+ WrittenToDisk)
+ end.
+
+-spec refresh_feature_flags_after_app_load([atom()]) ->
+ ok | {error, any()} | no_return().
+
+refresh_feature_flags_after_app_load([]) ->
+ ok;
+refresh_feature_flags_after_app_load(Apps) ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: new apps loaded: ~p -> refreshing feature flags",
+ [Apps]),
+
+ FeatureFlags0 = list(all),
+ FeatureFlags1 = query_supported_feature_flags(),
+
+ %% The following list contains all the feature flags this node
+ %% learned about only because remote nodes have them. Now, the
+ %% applications providing them are loaded locally as well.
+ %% Therefore, we may run their migration function in case the state
+ %% of this node needs it.
+ AlreadySupportedFeatureNames = maps:keys(
+ maps:filter(
+ fun(_, #{provided_by := App}) ->
+ lists:member(App, Apps)
+ end, FeatureFlags0)),
+ case AlreadySupportedFeatureNames of
+ [] ->
+ ok;
+ _ ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: new apps loaded: feature flags already "
+ "supported: ~p",
+ [lists:sort(AlreadySupportedFeatureNames)])
+ end,
+
+ %% The following list contains all the feature flags no nodes in the
+ %% cluster knew about before: this is the first time we see them in
+ %% this instance of the cluster. We need to register them on all
+ %% nodes.
+ NewSupportedFeatureFlags = maps:filter(
+ fun(FeatureName, _) ->
+ not maps:is_key(FeatureName,
+ FeatureFlags0)
+ end, FeatureFlags1),
+ case maps:keys(NewSupportedFeatureFlags) of
+ [] ->
+ ok;
+ NewSupportedFeatureNames ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: new apps loaded: new feature flags (unseen so "
+ "far): ~p ",
+ [lists:sort(NewSupportedFeatureNames)])
+ end,
+
+ case initialize_registry() of
+ ok ->
+ Ret = maybe_enable_locally_after_app_load(
+ AlreadySupportedFeatureNames),
+ case Ret of
+ ok ->
+ share_new_feature_flags_after_app_load(
+ NewSupportedFeatureFlags, ?TIMEOUT);
+ Error ->
+ Error
+ end;
+ Error ->
+ Error
+ end.
+
+maybe_enable_locally_after_app_load([]) ->
+ ok;
+maybe_enable_locally_after_app_load([FeatureName | Rest]) ->
+ case is_enabled(FeatureName) of
+ true ->
+ case do_enable_locally(FeatureName) of
+ ok -> maybe_enable_locally_after_app_load(Rest);
+ Error -> Error
+ end;
+ false ->
+ maybe_enable_locally_after_app_load(Rest)
+ end.
+
+share_new_feature_flags_after_app_load(FeatureFlags, Timeout) ->
+ push_local_feature_flags_from_apps_unknown_remotely(
+ node(), FeatureFlags, Timeout).
+
+on_load() ->
+ %% The goal of this `on_load()` code server hook is to prevent this
+ %% module from being loaded in an already running RabbitMQ node if
+ %% the running version does not have the feature flags subsystem.
+ %%
+ %% This situation happens when an upgrade overwrites RabbitMQ files
+ %% with the node still running. This is the case with many packages:
+ %% files are updated on disk, then a post-install step takes care of
+ %% restarting the service.
+ %%
+ %% The problem is that if many nodes in a cluster are updated at the
+ %% same time, one node running the newer version might query feature
+ %% flags on an old node where this module is already available
+ %% (because files were already overwritten). This causes the query
+ %% to report an unexpected answer and the newer node to refuse to
+ %% start.
+ %%
+ %% However, when the module is executed outside of RabbitMQ (for
+ %% debugging purpose or in the context of EUnit for instance), we
+ %% want to allow the load. That's why we first check if RabbitMQ is
+ %% actually running.
+ case rabbit:is_running() of
+ true ->
+ %% RabbitMQ is running.
+ %%
+ %% Now we want to differentiate a pre-feature-flags node
+ %% from one having the subsystem.
+ %%
+ %% To do that, we verify if the `feature_flags_file`
+ %% application environment variable is defined. With a
+ %% feature-flags-enabled node, this application environment
+ %% variable is defined by rabbitmq-server(8).
+ case application:get_env(rabbit, feature_flags_file) of
+ {ok, _} ->
+ %% This is a feature-flags-enabled version. Loading
+ %% the module is permitted.
+ ok;
+ _ ->
+ %% This is a pre-feature-flags version. We deny the
+ %% load and report why, possibly specifying the
+ %% version of RabbitMQ.
+ Vsn = case application:get_key(rabbit, vsn) of
+ {ok, V} -> V;
+ undefined -> "unknown version"
+ end,
+ "Refusing to load '" ?MODULE_STRING "' on this "
+ "node. It appears to be running a pre-feature-flags "
+ "version of RabbitMQ (" ++ Vsn ++ "). This is fine: "
+ "a newer version of RabbitMQ was deployed on this "
+ "node, but it was not restarted yet. This warning "
+ "is probably caused by a remote node querying this "
+ "node for its feature flags."
+ end;
+ false ->
+ %% RabbitMQ is not running. Loading the module is permitted
+ %% because this Erlang node will never be queried for its
+ %% feature flags.
+ ok
+ end.
diff --git a/deps/rabbit/src/rabbit_ff_extra.erl b/deps/rabbit/src/rabbit_ff_extra.erl
new file mode 100644
index 0000000000..f0728d491e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_ff_extra.erl
@@ -0,0 +1,244 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% @copyright 2018-2020 VMware, Inc. or its affiliates.
+%%
+%% @doc
+%% This module provides extra functions unused by the feature flags
+%% subsystem core functionality.
+
+-module(rabbit_ff_extra).
+
+-include_lib("stdout_formatter/include/stdout_formatter.hrl").
+
+-export([cli_info/0,
+ info/1,
+ info/2,
+ format_error/1]).
+
+-type cli_info() :: [cli_info_entry()].
+%% A list of feature flags properties, formatted for the RabbitMQ CLI.
+
+-type cli_info_entry() :: [{name, rabbit_feature_flags:feature_name()} |
+ {state, enabled | disabled | unavailable} |
+ {stability, rabbit_feature_flags:stability()} |
+ {provided_by, atom()} |
+ {desc, string()} |
+ {doc_url, string()}].
+%% A list of properties for a single feature flag, formatted for the
+%% RabbitMQ CLI.
+
+-type info_options() :: #{colors => boolean(),
+ lines => boolean(),
+ verbose => non_neg_integer()}.
+%% Options accepted by {@link info/1} and {@link info/2}.
+
+-export_type([info_options/0]).
+
+-spec cli_info() -> cli_info().
+%% @doc
+%% Returns a list of all feature flags properties.
+%%
+%% @returns the list of all feature flags properties.
+
+cli_info() ->
+ cli_info(rabbit_feature_flags:list(all)).
+
+-spec cli_info(rabbit_feature_flags:feature_flags()) -> cli_info().
+%% @doc
+%% Formats a map of feature flags and their properties into a list of
+%% feature flags properties as expected by the RabbitMQ CLI.
+%%
+%% @param FeatureFlags A map of feature flags.
+%% @returns the list of feature flags properties, created from the map
+%% specified in arguments.
+
+cli_info(FeatureFlags) ->
+ lists:foldr(
+ fun(FeatureName, Acc) ->
+ FeatureProps = maps:get(FeatureName, FeatureFlags),
+ State = rabbit_feature_flags:get_state(FeatureName),
+ Stability = rabbit_feature_flags:get_stability(FeatureProps),
+ App = maps:get(provided_by, FeatureProps),
+ Desc = maps:get(desc, FeatureProps, ""),
+ DocUrl = maps:get(doc_url, FeatureProps, ""),
+ FFInfo = [{name, FeatureName},
+ {desc, unicode:characters_to_binary(Desc)},
+ {doc_url, unicode:characters_to_binary(DocUrl)},
+ {state, State},
+ {stability, Stability},
+ {provided_by, App}],
+ [FFInfo | Acc]
+ end, [], lists:sort(maps:keys(FeatureFlags))).
+
+-spec info(info_options()) -> ok.
+%% @doc
+%% Displays an array of all supported feature flags and their properties
+%% on `stdout'.
+%%
+%% @param Options Options to tune what is displayed and how.
+
+info(Options) ->
+ %% Two tables: one for stable feature flags, one for experimental ones.
+ StableFF = rabbit_feature_flags:list(all, stable),
+ case maps:size(StableFF) of
+ 0 ->
+ ok;
+ _ ->
+ stdout_formatter:display(
+ #paragraph{content = "\n## Stable feature flags:",
+ props = #{bold => true}}),
+ info(StableFF, Options)
+ end,
+ ExpFF = rabbit_feature_flags:list(all, experimental),
+ case maps:size(ExpFF) of
+ 0 ->
+ ok;
+ _ ->
+ stdout_formatter:display(
+ #paragraph{content = "\n## Experimental feature flags:",
+ props = #{bold => true}}),
+ info(ExpFF, Options)
+ end,
+ case maps:size(StableFF) + maps:size(ExpFF) of
+ 0 -> ok;
+ _ -> state_legend(Options)
+ end.
+
+-spec info(rabbit_feature_flags:feature_flags(), info_options()) -> ok.
+%% @doc
+%% Displays an array of feature flags and their properties on `stdout',
+%% based on the specified feature flags map.
+%%
+%% @param FeatureFlags Map of the feature flags to display.
+%% @param Options Options to tune what is displayed and how.
+
+info(FeatureFlags, Options) ->
+ Verbose = maps:get(verbose, Options, 0),
+ UseColors = use_colors(Options),
+ UseLines = use_lines(Options),
+ Title = case UseColors of
+ true -> #{title => true};
+ false -> #{}
+ end,
+ Bold = case UseColors of
+ true -> #{bold => true};
+ false -> #{}
+ end,
+ {Green, Yellow, Red} = case UseColors of
+ true ->
+ {#{fg => green},
+ #{fg => yellow},
+ #{bold => true,
+ bg => red}};
+ false ->
+ {#{}, #{}, #{}}
+ end,
+ Border = case UseLines of
+ true -> #{border_drawing => ansi};
+ false -> #{border_drawing => ascii}
+ end,
+ %% Table columns:
+ %% | Name | State | Provided by | Description
+ %%
+ %% where:
+ %% State = Enabled | Disabled | Unavailable (if a node doesn't
+ %% support it).
+ TableHeader = #row{cells = ["Name",
+ "State",
+ "Provided",
+ "Description"],
+ props = Title},
+ Nodes = lists:sort([node() | rabbit_feature_flags:remote_nodes()]),
+ Rows = lists:map(
+ fun(FeatureName) ->
+ FeatureProps = maps:get(FeatureName, FeatureFlags),
+ State0 = rabbit_feature_flags:get_state(FeatureName),
+ {State, Color} = case State0 of
+ enabled ->
+ {"Enabled", Green};
+ disabled ->
+ {"Disabled", Yellow};
+ unavailable ->
+ {"Unavailable", Red}
+ end,
+ App = maps:get(provided_by, FeatureProps),
+ Desc = maps:get(desc, FeatureProps, ""),
+ VFun = fun(Node) ->
+ Supported =
+ rabbit_feature_flags:does_node_support(
+ Node, [FeatureName], 60000),
+ {Label, LabelColor} =
+ case Supported of
+ true -> {"supported", #{}};
+ false -> {"unsupported", Red}
+ end,
+ #paragraph{content =
+ [rabbit_misc:format(" ~s: ",
+ [Node]),
+ #paragraph{content = Label,
+ props = LabelColor}]}
+ end,
+ ExtraLines = if
+ Verbose > 0 ->
+ NodesList = lists:join(
+ "\n",
+ lists:map(
+ VFun, Nodes)),
+ ["\n\n",
+ "Per-node support level:\n"
+ | NodesList];
+ true ->
+ []
+ end,
+ [#paragraph{content = FeatureName,
+ props = Bold},
+ #paragraph{content = State,
+ props = Color},
+ #paragraph{content = App},
+ #paragraph{content = [Desc | ExtraLines]}]
+ end, lists:sort(maps:keys(FeatureFlags))),
+ io:format("~n", []),
+ stdout_formatter:display(#table{rows = [TableHeader | Rows],
+ props = Border#{cell_padding => {0, 1}}}).
+
+use_colors(Options) ->
+ maps:get(colors, Options, true).
+
+use_lines(Options) ->
+ maps:get(lines, Options, true).
+
+state_legend(Options) ->
+ UseColors = use_colors(Options),
+ {Green, Yellow, Red} = case UseColors of
+ true ->
+ {#{fg => green},
+ #{fg => yellow},
+ #{bold => true,
+ bg => red}};
+ false ->
+ {#{}, #{}, #{}}
+ end,
+ Enabled = #paragraph{content = "Enabled", props = Green},
+ Disabled = #paragraph{content = "Disabled", props = Yellow},
+ Unavailable = #paragraph{content = "Unavailable", props = Red},
+ stdout_formatter:display(
+ #paragraph{
+ content =
+ ["\n",
+ "Possible states:\n",
+ " ", Enabled, ": The feature flag is enabled on all nodes\n",
+ " ", Disabled, ": The feature flag is disabled on all nodes\n",
+ " ", Unavailable, ": The feature flag cannot be enabled because"
+ " one or more nodes do not support it\n"]}).
+
+-spec format_error(any()) -> string().
+%% @doc
+%% Formats the error reason term so it can be presented to human beings.
+%%
+%% @param Reason The term in the `{error, Reason}' tuple.
+%% @returns the formatted error reason.
+
+format_error(Reason) ->
+ rabbit_misc:format("~p", [Reason]).
diff --git a/deps/rabbit/src/rabbit_ff_registry.erl b/deps/rabbit/src/rabbit_ff_registry.erl
new file mode 100644
index 0000000000..372971f949
--- /dev/null
+++ b/deps/rabbit/src/rabbit_ff_registry.erl
@@ -0,0 +1,189 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% @author The RabbitMQ team
+%% @copyright 2018-2020 VMware, Inc. or its affiliates.
+%%
+%% @doc
+%% This module exposes the API of the {@link rabbit_feature_flags}
+%% registry. The feature flags registry is an Erlang module, compiled at
+%% runtime, storing all the informations about feature flags: which are
+%% supported, which are enabled, etc.
+%%
+%% Because it is compiled at runtime, the initial source code is mostly
+%% an API reference. What the initial module does is merely ask {@link
+%% rabbit_feature_flags} to generate the real registry.
+
+-module(rabbit_ff_registry).
+
+-export([get/1,
+ list/1,
+ states/0,
+ is_supported/1,
+ is_enabled/1,
+ is_registry_initialized/0,
+ is_registry_written_to_disk/0]).
+
+-ifdef(TEST).
+-on_load(on_load/0).
+-endif.
+
+-spec get(rabbit_feature_flags:feature_name()) ->
+ rabbit_feature_flags:feature_props() | undefined.
+%% @doc
+%% Returns the properties of a feature flag.
+%%
+%% Only the informations stored in the local registry is used to answer
+%% this call.
+%%
+%% @param FeatureName The name of the feature flag.
+%% @returns the properties of the specified feature flag.
+
+get(FeatureName) ->
+ rabbit_feature_flags:initialize_registry(),
+ %% Initially, is_registry_initialized/0 always returns `false`
+ %% and this ?MODULE:get(FeatureName) is always called. The case
+ %% statement is here to please Dialyzer.
+ case is_registry_initialized() of
+ false -> ?MODULE:get(FeatureName);
+ true -> undefined
+ end.
+
+-spec list(all | enabled | disabled) -> rabbit_feature_flags:feature_flags().
+%% @doc
+%% Lists all, enabled or disabled feature flags, depending on the argument.
+%%
+%% Only the informations stored in the local registry is used to answer
+%% this call.
+%%
+%% @param Which The group of feature flags to return: `all', `enabled' or
+%% `disabled'.
+%% @returns A map of selected feature flags.
+
+list(Which) ->
+ rabbit_feature_flags:initialize_registry(),
+ %% See get/1 for an explanation of the case statement below.
+ case is_registry_initialized() of
+ false -> ?MODULE:list(Which);
+ true -> #{}
+ end.
+
+-spec states() -> rabbit_feature_flags:feature_states().
+%% @doc
+%% Returns the states of supported feature flags.
+%%
+%% Only the informations stored in the local registry is used to answer
+%% this call.
+%%
+%% @returns A map of feature flag states.
+
+states() ->
+ rabbit_feature_flags:initialize_registry(),
+ %% See get/1 for an explanation of the case statement below.
+ case is_registry_initialized() of
+ false -> ?MODULE:states();
+ true -> #{}
+ end.
+
+-spec is_supported(rabbit_feature_flags:feature_name()) -> boolean().
+%% @doc
+%% Returns if a feature flag is supported.
+%%
+%% Only the informations stored in the local registry is used to answer
+%% this call.
+%%
+%% @param FeatureName The name of the feature flag to be checked.
+%% @returns `true' if the feature flag is supported, or `false'
+%% otherwise.
+
+is_supported(FeatureName) ->
+ rabbit_feature_flags:initialize_registry(),
+ %% See get/1 for an explanation of the case statement below.
+ case is_registry_initialized() of
+ false -> ?MODULE:is_supported(FeatureName);
+ true -> false
+ end.
+
+-spec is_enabled(rabbit_feature_flags:feature_name()) -> boolean() | state_changing.
+%% @doc
+%% Returns if a feature flag is supported or if its state is changing.
+%%
+%% Only the informations stored in the local registry is used to answer
+%% this call.
+%%
+%% @param FeatureName The name of the feature flag to be checked.
+%% @returns `true' if the feature flag is supported, `state_changing' if
+%% its state is transient, or `false' otherwise.
+
+is_enabled(FeatureName) ->
+ rabbit_feature_flags:initialize_registry(),
+ %% See get/1 for an explanation of the case statement below.
+ case is_registry_initialized() of
+ false -> ?MODULE:is_enabled(FeatureName);
+ true -> false
+ end.
+
+-spec is_registry_initialized() -> boolean().
+%% @doc
+%% Indicates if the registry is initialized.
+%%
+%% The registry is considered initialized once the initial Erlang module
+%% was replaced by the copy compiled at runtime.
+%%
+%% @returns `true' when the module is the one compiled at runtime,
+%% `false' when the module is the initial one compiled from RabbitMQ
+%% source code.
+
+is_registry_initialized() ->
+ always_return_false().
+
+-spec is_registry_written_to_disk() -> boolean().
+%% @doc
+%% Indicates if the feature flags state was successfully persisted to disk.
+%%
+%% Note that on startup, {@link rabbit_feature_flags} tries to determine
+%% the state of each supported feature flag, regardless of the
+%% information on disk, to ensure maximum consistency. However, this can
+%% be done for feature flags supporting it only.
+%%
+%% @returns `true' if the state was successfully written to disk and
+%% the registry can be initialized from that during the next RabbitMQ
+%% startup, `false' if the write failed and the node might loose feature
+%% flags state on restart.
+
+is_registry_written_to_disk() ->
+ always_return_true().
+
+always_return_true() ->
+ %% This function is here to trick Dialyzer. We want some functions
+ %% in this initial on-disk registry to always return `true` or
+ %% `false`. However the generated registry will return actual
+ %% booleans. The `-spec()` correctly advertises a return type of
+ %% `boolean()`. But in the meantime, Dialyzer only knows about this
+ %% copy which, without the trick below, would always return either
+ %% `true` (e.g. in is_registry_written_to_disk/0) or `false` (e.g.
+ %% is_registry_initialized/0). This obviously causes some warnings
+ %% where the registry functions are used: Dialyzer believes that
+ %% e.g. matching the return value of is_registry_initialized/0
+ %% against `true` will never succeed.
+ %%
+ %% That's why this function makes a call which we know the result,
+ %% but not Dialyzer, to "create" that hard-coded `true` return
+ %% value.
+ erlang:get({?MODULE, always_undefined}) =:= undefined.
+
+always_return_false() ->
+ not always_return_true().
+
+-ifdef(TEST).
+on_load() ->
+ _ = (catch rabbit_log_feature_flags:debug(
+ "Feature flags: Loading initial (uninitialized) registry "
+ "module (~p)",
+ [self()])),
+ ok.
+-endif.
diff --git a/deps/rabbit/src/rabbit_fhc_helpers.erl b/deps/rabbit/src/rabbit_fhc_helpers.erl
new file mode 100644
index 0000000000..d310e84008
--- /dev/null
+++ b/deps/rabbit/src/rabbit_fhc_helpers.erl
@@ -0,0 +1,45 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_fhc_helpers).
+
+-export([clear_read_cache/0]).
+
+-include("amqqueue.hrl").
+
+clear_read_cache() ->
+ case application:get_env(rabbit, fhc_read_buffering) of
+ {ok, true} ->
+ file_handle_cache:clear_read_cache(),
+ clear_vhost_read_cache(rabbit_vhost:list_names());
+ _ -> %% undefined or {ok, false}
+ ok
+ end.
+
+clear_vhost_read_cache([]) ->
+ ok;
+clear_vhost_read_cache([VHost | Rest]) ->
+ clear_queue_read_cache(rabbit_amqqueue:list(VHost)),
+ clear_vhost_read_cache(Rest).
+
+clear_queue_read_cache([]) ->
+ ok;
+clear_queue_read_cache([Q | Rest]) when ?is_amqqueue(Q) ->
+ MPid = amqqueue:get_pid(Q),
+ SPids = amqqueue:get_slave_pids(Q),
+ %% Limit the action to the current node.
+ Pids = [P || P <- [MPid | SPids], node(P) =:= node()],
+ %% This function is executed in the context of the backing queue
+ %% process because the read buffer is stored in the process
+ %% dictionary.
+ Fun = fun(_, State) ->
+ _ = file_handle_cache:clear_process_read_cache(),
+ State
+ end,
+ [rabbit_amqqueue:run_backing_queue(Pid, rabbit_variable_queue, Fun)
+ || Pid <- Pids],
+ clear_queue_read_cache(Rest).
diff --git a/deps/rabbit/src/rabbit_fifo.erl b/deps/rabbit/src/rabbit_fifo.erl
new file mode 100644
index 0000000000..51acfffd0d
--- /dev/null
+++ b/deps/rabbit/src/rabbit_fifo.erl
@@ -0,0 +1,2124 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_fifo).
+
+-behaviour(ra_machine).
+
+-compile(inline_list_funcs).
+-compile(inline).
+-compile({no_auto_import, [apply/3]}).
+
+-include("rabbit_fifo.hrl").
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([
+ init/1,
+ apply/3,
+ state_enter/2,
+ tick/2,
+ overview/1,
+ get_checked_out/4,
+ %% versioning
+ version/0,
+ which_module/1,
+ %% aux
+ init_aux/1,
+ handle_aux/6,
+ % queries
+ query_messages_ready/1,
+ query_messages_checked_out/1,
+ query_messages_total/1,
+ query_processes/1,
+ query_ra_indexes/1,
+ query_consumer_count/1,
+ query_consumers/1,
+ query_stat/1,
+ query_single_active_consumer/1,
+ query_in_memory_usage/1,
+ query_peek/2,
+ usage/1,
+
+ zero/1,
+
+ %% misc
+ dehydrate_state/1,
+ normalize/1,
+
+ %% protocol helpers
+ make_enqueue/3,
+ make_register_enqueuer/1,
+ make_checkout/3,
+ make_settle/2,
+ make_return/2,
+ make_discard/2,
+ make_credit/4,
+ make_purge/0,
+ make_purge_nodes/1,
+ make_update_config/1,
+ make_garbage_collection/0
+ ]).
+
+%% command records representing all the protocol actions that are supported
+-record(enqueue, {pid :: option(pid()),
+ seq :: option(msg_seqno()),
+ msg :: raw_msg()}).
+-record(register_enqueuer, {pid :: pid()}).
+-record(checkout, {consumer_id :: consumer_id(),
+ spec :: checkout_spec(),
+ meta :: consumer_meta()}).
+-record(settle, {consumer_id :: consumer_id(),
+ msg_ids :: [msg_id()]}).
+-record(return, {consumer_id :: consumer_id(),
+ msg_ids :: [msg_id()]}).
+-record(discard, {consumer_id :: consumer_id(),
+ msg_ids :: [msg_id()]}).
+-record(credit, {consumer_id :: consumer_id(),
+ credit :: non_neg_integer(),
+ delivery_count :: non_neg_integer(),
+ drain :: boolean()}).
+-record(purge, {}).
+-record(purge_nodes, {nodes :: [node()]}).
+-record(update_config, {config :: config()}).
+-record(garbage_collection, {}).
+
+-opaque protocol() ::
+ #enqueue{} |
+ #register_enqueuer{} |
+ #checkout{} |
+ #settle{} |
+ #return{} |
+ #discard{} |
+ #credit{} |
+ #purge{} |
+ #purge_nodes{} |
+ #update_config{} |
+ #garbage_collection{}.
+
+-type command() :: protocol() | ra_machine:builtin_command().
+%% all the command types supported by ra fifo
+
+-type client_msg() :: delivery().
+%% the messages `rabbit_fifo' can send to consumers.
+
+-opaque state() :: #?MODULE{}.
+
+-export_type([protocol/0,
+ delivery/0,
+ command/0,
+ credit_mode/0,
+ consumer_tag/0,
+ consumer_meta/0,
+ consumer_id/0,
+ client_msg/0,
+ msg/0,
+ msg_id/0,
+ msg_seqno/0,
+ delivery_msg/0,
+ state/0,
+ config/0]).
+
+-spec init(config()) -> state().
+init(#{name := Name,
+ queue_resource := Resource} = Conf) ->
+ update_config(Conf, #?MODULE{cfg = #cfg{name = Name,
+ resource = Resource}}).
+
+update_config(Conf, State) ->
+ DLH = maps:get(dead_letter_handler, Conf, undefined),
+ BLH = maps:get(become_leader_handler, Conf, undefined),
+ RCI = maps:get(release_cursor_interval, Conf, ?RELEASE_CURSOR_EVERY),
+ Overflow = maps:get(overflow_strategy, Conf, drop_head),
+ MaxLength = maps:get(max_length, Conf, undefined),
+ MaxBytes = maps:get(max_bytes, Conf, undefined),
+ MaxMemoryLength = maps:get(max_in_memory_length, Conf, undefined),
+ MaxMemoryBytes = maps:get(max_in_memory_bytes, Conf, undefined),
+ DeliveryLimit = maps:get(delivery_limit, Conf, undefined),
+ Expires = maps:get(expires, Conf, undefined),
+ ConsumerStrategy = case maps:get(single_active_consumer_on, Conf, false) of
+ true ->
+ single_active;
+ false ->
+ competing
+ end,
+ Cfg = State#?MODULE.cfg,
+ RCISpec = {RCI, RCI},
+
+ LastActive = maps:get(created, Conf, undefined),
+ State#?MODULE{cfg = Cfg#cfg{release_cursor_interval = RCISpec,
+ dead_letter_handler = DLH,
+ become_leader_handler = BLH,
+ overflow_strategy = Overflow,
+ max_length = MaxLength,
+ max_bytes = MaxBytes,
+ max_in_memory_length = MaxMemoryLength,
+ max_in_memory_bytes = MaxMemoryBytes,
+ consumer_strategy = ConsumerStrategy,
+ delivery_limit = DeliveryLimit,
+ expires = Expires},
+ last_active = LastActive}.
+
+zero(_) ->
+ 0.
+
+% msg_ids are scoped per consumer
+% ra_indexes holds all raft indexes for enqueues currently on queue
+-spec apply(ra_machine:command_meta_data(), command(), state()) ->
+ {state(), Reply :: term(), ra_machine:effects()} |
+ {state(), Reply :: term()}.
+apply(Meta, #enqueue{pid = From, seq = Seq,
+ msg = RawMsg}, State00) ->
+ apply_enqueue(Meta, From, Seq, RawMsg, State00);
+apply(_Meta, #register_enqueuer{pid = Pid},
+ #?MODULE{enqueuers = Enqueuers0,
+ cfg = #cfg{overflow_strategy = Overflow}} = State0) ->
+
+ State = case maps:is_key(Pid, Enqueuers0) of
+ true ->
+ %% if the enqueuer exits just echo the overflow state
+ State0;
+ false ->
+ State0#?MODULE{enqueuers = Enqueuers0#{Pid => #enqueuer{}}}
+ end,
+ Res = case is_over_limit(State) of
+ true when Overflow == reject_publish ->
+ reject_publish;
+ _ ->
+ ok
+ end,
+ {State, Res, [{monitor, process, Pid}]};
+apply(Meta,
+ #settle{msg_ids = MsgIds, consumer_id = ConsumerId},
+ #?MODULE{consumers = Cons0} = State) ->
+ case Cons0 of
+ #{ConsumerId := Con0} ->
+ % need to increment metrics before completing as any snapshot
+ % states taken need to include them
+ complete_and_checkout(Meta, MsgIds, ConsumerId,
+ Con0, [], State);
+ _ ->
+ {State, ok}
+
+ end;
+apply(Meta, #discard{msg_ids = MsgIds, consumer_id = ConsumerId},
+ #?MODULE{consumers = Cons0} = State0) ->
+ case Cons0 of
+ #{ConsumerId := Con0} ->
+ Discarded = maps:with(MsgIds, Con0#consumer.checked_out),
+ Effects = dead_letter_effects(rejected, Discarded, State0, []),
+ complete_and_checkout(Meta, MsgIds, ConsumerId, Con0,
+ Effects, State0);
+ _ ->
+ {State0, ok}
+ end;
+apply(Meta, #return{msg_ids = MsgIds, consumer_id = ConsumerId},
+ #?MODULE{consumers = Cons0} = State) ->
+ case Cons0 of
+ #{ConsumerId := #consumer{checked_out = Checked0}} ->
+ Returned = maps:with(MsgIds, Checked0),
+ return(Meta, ConsumerId, Returned, [], State);
+ _ ->
+ {State, ok}
+ end;
+apply(Meta, #credit{credit = NewCredit, delivery_count = RemoteDelCnt,
+ drain = Drain, consumer_id = ConsumerId},
+ #?MODULE{consumers = Cons0,
+ service_queue = ServiceQueue0,
+ waiting_consumers = Waiting0} = State0) ->
+ case Cons0 of
+ #{ConsumerId := #consumer{delivery_count = DelCnt} = Con0} ->
+ %% this can go below 0 when credit is reduced
+ C = max(0, RemoteDelCnt + NewCredit - DelCnt),
+ %% grant the credit
+ Con1 = Con0#consumer{credit = C},
+ ServiceQueue = maybe_queue_consumer(ConsumerId, Con1,
+ ServiceQueue0),
+ Cons = maps:put(ConsumerId, Con1, Cons0),
+ {State1, ok, Effects} =
+ checkout(Meta, State0,
+ State0#?MODULE{service_queue = ServiceQueue,
+ consumers = Cons}, []),
+ Response = {send_credit_reply, messages_ready(State1)},
+ %% by this point all checkouts for the updated credit value
+ %% should be processed so we can evaluate the drain
+ case Drain of
+ false ->
+ %% just return the result of the checkout
+ {State1, Response, Effects};
+ true ->
+ Con = #consumer{credit = PostCred} =
+ maps:get(ConsumerId, State1#?MODULE.consumers),
+ %% add the outstanding credit to the delivery count
+ DeliveryCount = Con#consumer.delivery_count + PostCred,
+ Consumers = maps:put(ConsumerId,
+ Con#consumer{delivery_count = DeliveryCount,
+ credit = 0},
+ State1#?MODULE.consumers),
+ Drained = Con#consumer.credit,
+ {CTag, _} = ConsumerId,
+ {State1#?MODULE{consumers = Consumers},
+ %% returning a multi response with two client actions
+ %% for the channel to execute
+ {multi, [Response, {send_drained, {CTag, Drained}}]},
+ Effects}
+ end;
+ _ when Waiting0 /= [] ->
+ %% there are waiting consuemrs
+ case lists:keytake(ConsumerId, 1, Waiting0) of
+ {value, {_, Con0 = #consumer{delivery_count = DelCnt}}, Waiting} ->
+ %% the consumer is a waiting one
+ %% grant the credit
+ C = max(0, RemoteDelCnt + NewCredit - DelCnt),
+ Con = Con0#consumer{credit = C},
+ State = State0#?MODULE{waiting_consumers =
+ [{ConsumerId, Con} | Waiting]},
+ {State, {send_credit_reply, messages_ready(State)}};
+ false ->
+ {State0, ok}
+ end;
+ _ ->
+ %% credit for unknown consumer - just ignore
+ {State0, ok}
+ end;
+apply(_, #checkout{spec = {dequeue, _}},
+ #?MODULE{cfg = #cfg{consumer_strategy = single_active}} = State0) ->
+ {State0, {error, {unsupported, single_active_consumer}}};
+apply(#{index := Index,
+ system_time := Ts,
+ from := From} = Meta, #checkout{spec = {dequeue, Settlement},
+ meta = ConsumerMeta,
+ consumer_id = ConsumerId},
+ #?MODULE{consumers = Consumers} = State00) ->
+ %% dequeue always updates last_active
+ State0 = State00#?MODULE{last_active = Ts},
+ %% all dequeue operations result in keeping the queue from expiring
+ Exists = maps:is_key(ConsumerId, Consumers),
+ case messages_ready(State0) of
+ 0 ->
+ {State0, {dequeue, empty}};
+ _ when Exists ->
+ %% a dequeue using the same consumer_id isn't possible at this point
+ {State0, {dequeue, empty}};
+ Ready ->
+ State1 = update_consumer(ConsumerId, ConsumerMeta,
+ {once, 1, simple_prefetch}, 0,
+ State0),
+ {success, _, MsgId, Msg, State2} = checkout_one(Meta, State1),
+ {State4, Effects1} = case Settlement of
+ unsettled ->
+ {_, Pid} = ConsumerId,
+ {State2, [{monitor, process, Pid}]};
+ settled ->
+ %% immediately settle the checkout
+ {State3, _, Effects0} =
+ apply(Meta, make_settle(ConsumerId, [MsgId]),
+ State2),
+ {State3, Effects0}
+ end,
+ {Reply, Effects2} =
+ case Msg of
+ {RaftIdx, {Header, empty}} ->
+ %% TODO add here new log effect with reply
+ {'$ra_no_reply',
+ [reply_log_effect(RaftIdx, MsgId, Header, Ready - 1, From) |
+ Effects1]};
+ _ ->
+ {{dequeue, {MsgId, Msg}, Ready-1}, Effects1}
+
+ end,
+
+ case evaluate_limit(Index, false, State0, State4, Effects2) of
+ {State, true, Effects} ->
+ update_smallest_raft_index(Index, Reply, State, Effects);
+ {State, false, Effects} ->
+ {State, Reply, Effects}
+ end
+ end;
+apply(Meta, #checkout{spec = cancel, consumer_id = ConsumerId}, State0) ->
+ {State, Effects} = cancel_consumer(Meta, ConsumerId, State0, [],
+ consumer_cancel),
+ checkout(Meta, State0, State, Effects);
+apply(Meta, #checkout{spec = Spec, meta = ConsumerMeta,
+ consumer_id = {_, Pid} = ConsumerId},
+ State0) ->
+ Priority = get_priority_from_args(ConsumerMeta),
+ State1 = update_consumer(ConsumerId, ConsumerMeta, Spec, Priority, State0),
+ checkout(Meta, State0, State1, [{monitor, process, Pid}]);
+apply(#{index := Index}, #purge{},
+ #?MODULE{ra_indexes = Indexes0,
+ returns = Returns,
+ messages = Messages} = State0) ->
+ Total = messages_ready(State0),
+ Indexes1 = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes0,
+ [I || {_, {I, _}} <- lqueue:to_list(Messages)]),
+ Indexes = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes1,
+ [I || {_, {I, _}} <- lqueue:to_list(Returns)]),
+
+ State1 = State0#?MODULE{ra_indexes = Indexes,
+ messages = lqueue:new(),
+ returns = lqueue:new(),
+ msg_bytes_enqueue = 0,
+ prefix_msgs = {0, [], 0, []},
+ msg_bytes_in_memory = 0,
+ msgs_ready_in_memory = 0},
+ Effects0 = [garbage_collection],
+ Reply = {purge, Total},
+ {State, _, Effects} = evaluate_limit(Index, false, State0,
+ State1, Effects0),
+ update_smallest_raft_index(Index, Reply, State, Effects);
+apply(_Meta, #garbage_collection{}, State) ->
+ {State, ok, [{aux, garbage_collection}]};
+apply(#{system_time := Ts} = Meta, {down, Pid, noconnection},
+ #?MODULE{consumers = Cons0,
+ cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = Waiting0,
+ enqueuers = Enqs0} = State0) ->
+ Node = node(Pid),
+ %% if the pid refers to an active or cancelled consumer,
+ %% mark it as suspected and return it to the waiting queue
+ {State1, Effects0} =
+ maps:fold(fun({_, P} = Cid, C0, {S0, E0})
+ when node(P) =:= Node ->
+ %% the consumer should be returned to waiting
+ %% and checked out messages should be returned
+ Effs = consumer_update_active_effects(
+ S0, Cid, C0, false, suspected_down, E0),
+ Checked = C0#consumer.checked_out,
+ Credit = increase_credit(C0, maps:size(Checked)),
+ {St, Effs1} = return_all(Meta, S0, Effs,
+ Cid, C0#consumer{credit = Credit}),
+ %% if the consumer was cancelled there is a chance it got
+ %% removed when returning hence we need to be defensive here
+ Waiting = case St#?MODULE.consumers of
+ #{Cid := C} ->
+ Waiting0 ++ [{Cid, C}];
+ _ ->
+ Waiting0
+ end,
+ {St#?MODULE{consumers = maps:remove(Cid, St#?MODULE.consumers),
+ waiting_consumers = Waiting,
+ last_active = Ts},
+ Effs1};
+ (_, _, S) ->
+ S
+ end, {State0, []}, Cons0),
+ WaitingConsumers = update_waiting_consumer_status(Node, State1,
+ suspected_down),
+
+ %% select a new consumer from the waiting queue and run a checkout
+ State2 = State1#?MODULE{waiting_consumers = WaitingConsumers},
+ {State, Effects1} = activate_next_consumer(State2, Effects0),
+
+ %% mark any enquers as suspected
+ Enqs = maps:map(fun(P, E) when node(P) =:= Node ->
+ E#enqueuer{status = suspected_down};
+ (_, E) -> E
+ end, Enqs0),
+ Effects = [{monitor, node, Node} | Effects1],
+ checkout(Meta, State0, State#?MODULE{enqueuers = Enqs}, Effects);
+apply(#{system_time := Ts} = Meta, {down, Pid, noconnection},
+ #?MODULE{consumers = Cons0,
+ enqueuers = Enqs0} = State0) ->
+ %% A node has been disconnected. This doesn't necessarily mean that
+ %% any processes on this node are down, they _may_ come back so here
+ %% we just mark them as suspected (effectively deactivated)
+ %% and return all checked out messages to the main queue for delivery to any
+ %% live consumers
+ %%
+ %% all pids for the disconnected node will be marked as suspected not just
+ %% the one we got the `down' command for
+ Node = node(Pid),
+
+ {State, Effects1} =
+ maps:fold(
+ fun({_, P} = Cid, #consumer{checked_out = Checked0,
+ status = up} = C0,
+ {St0, Eff}) when node(P) =:= Node ->
+ Credit = increase_credit(C0, map_size(Checked0)),
+ C = C0#consumer{status = suspected_down,
+ credit = Credit},
+ {St, Eff0} = return_all(Meta, St0, Eff, Cid, C),
+ Eff1 = consumer_update_active_effects(St, Cid, C, false,
+ suspected_down, Eff0),
+ {St, Eff1};
+ (_, _, {St, Eff}) ->
+ {St, Eff}
+ end, {State0, []}, Cons0),
+ Enqs = maps:map(fun(P, E) when node(P) =:= Node ->
+ E#enqueuer{status = suspected_down};
+ (_, E) -> E
+ end, Enqs0),
+
+ % Monitor the node so that we can "unsuspect" these processes when the node
+ % comes back, then re-issue all monitors and discover the final fate of
+ % these processes
+ Effects = case maps:size(State#?MODULE.consumers) of
+ 0 ->
+ [{aux, inactive}, {monitor, node, Node}];
+ _ ->
+ [{monitor, node, Node}]
+ end ++ Effects1,
+ checkout(Meta, State0, State#?MODULE{enqueuers = Enqs,
+ last_active = Ts}, Effects);
+apply(Meta, {down, Pid, _Info}, State0) ->
+ {State, Effects} = handle_down(Meta, Pid, State0),
+ checkout(Meta, State0, State, Effects);
+apply(Meta, {nodeup, Node}, #?MODULE{consumers = Cons0,
+ enqueuers = Enqs0,
+ service_queue = _SQ0} = State0) ->
+ %% A node we are monitoring has come back.
+ %% If we have suspected any processes of being
+ %% down we should now re-issue the monitors for them to detect if they're
+ %% actually down or not
+ Monitors = [{monitor, process, P}
+ || P <- suspected_pids_for(Node, State0)],
+
+ Enqs1 = maps:map(fun(P, E) when node(P) =:= Node ->
+ E#enqueuer{status = up};
+ (_, E) -> E
+ end, Enqs0),
+ ConsumerUpdateActiveFun = consumer_active_flag_update_function(State0),
+ %% mark all consumers as up
+ {State1, Effects1} =
+ maps:fold(fun({_, P} = ConsumerId, C, {SAcc, EAcc})
+ when (node(P) =:= Node) and
+ (C#consumer.status =/= cancelled) ->
+ EAcc1 = ConsumerUpdateActiveFun(SAcc, ConsumerId,
+ C, true, up, EAcc),
+ {update_or_remove_sub(Meta, ConsumerId,
+ C#consumer{status = up},
+ SAcc), EAcc1};
+ (_, _, Acc) ->
+ Acc
+ end, {State0, Monitors}, Cons0),
+ Waiting = update_waiting_consumer_status(Node, State1, up),
+ State2 = State1#?MODULE{
+ enqueuers = Enqs1,
+ waiting_consumers = Waiting},
+ {State, Effects} = activate_next_consumer(State2, Effects1),
+ checkout(Meta, State0, State, Effects);
+apply(_, {nodedown, _Node}, State) ->
+ {State, ok};
+apply(Meta, #purge_nodes{nodes = Nodes}, State0) ->
+ {State, Effects} = lists:foldl(fun(Node, {S, E}) ->
+ purge_node(Meta, Node, S, E)
+ end, {State0, []}, Nodes),
+ {State, ok, Effects};
+apply(Meta, #update_config{config = Conf}, State) ->
+ checkout(Meta, State, update_config(Conf, State), []);
+apply(_Meta, {machine_version, 0, 1}, V0State) ->
+ State = convert_v0_to_v1(V0State),
+ {State, ok, []}.
+
+convert_v0_to_v1(V0State0) ->
+ V0State = rabbit_fifo_v0:normalize_for_v1(V0State0),
+ V0Msgs = rabbit_fifo_v0:get_field(messages, V0State),
+ V1Msgs = lqueue:from_list(lists:sort(maps:to_list(V0Msgs))),
+ V0Enqs = rabbit_fifo_v0:get_field(enqueuers, V0State),
+ V1Enqs = maps:map(
+ fun (_EPid, E) ->
+ #enqueuer{next_seqno = element(2, E),
+ pending = element(3, E),
+ status = element(4, E)}
+ end, V0Enqs),
+ V0Cons = rabbit_fifo_v0:get_field(consumers, V0State),
+ V1Cons = maps:map(
+ fun (_CId, C0) ->
+ %% add the priority field
+ list_to_tuple(tuple_to_list(C0) ++ [0])
+ end, V0Cons),
+ V0SQ = rabbit_fifo_v0:get_field(service_queue, V0State),
+ V1SQ = priority_queue:from_list(queue:to_list(V0SQ)),
+ Cfg = #cfg{name = rabbit_fifo_v0:get_cfg_field(name, V0State),
+ resource = rabbit_fifo_v0:get_cfg_field(resource, V0State),
+ release_cursor_interval = rabbit_fifo_v0:get_cfg_field(release_cursor_interval, V0State),
+ dead_letter_handler = rabbit_fifo_v0:get_cfg_field(dead_letter_handler, V0State),
+ become_leader_handler = rabbit_fifo_v0:get_cfg_field(become_leader_handler, V0State),
+ %% TODO: what if policy enabling reject_publish was applied before conversion?
+ overflow_strategy = drop_head,
+ max_length = rabbit_fifo_v0:get_cfg_field(max_length, V0State),
+ max_bytes = rabbit_fifo_v0:get_cfg_field(max_bytes, V0State),
+ consumer_strategy = rabbit_fifo_v0:get_cfg_field(consumer_strategy, V0State),
+ delivery_limit = rabbit_fifo_v0:get_cfg_field(delivery_limit, V0State),
+ max_in_memory_length = rabbit_fifo_v0:get_cfg_field(max_in_memory_length, V0State),
+ max_in_memory_bytes = rabbit_fifo_v0:get_cfg_field(max_in_memory_bytes, V0State)
+ },
+
+ #?MODULE{cfg = Cfg,
+ messages = V1Msgs,
+ next_msg_num = rabbit_fifo_v0:get_field(next_msg_num, V0State),
+ returns = rabbit_fifo_v0:get_field(returns, V0State),
+ enqueue_count = rabbit_fifo_v0:get_field(enqueue_count, V0State),
+ enqueuers = V1Enqs,
+ ra_indexes = rabbit_fifo_v0:get_field(ra_indexes, V0State),
+ release_cursors = rabbit_fifo_v0:get_field(release_cursors, V0State),
+ consumers = V1Cons,
+ service_queue = V1SQ,
+ prefix_msgs = rabbit_fifo_v0:get_field(prefix_msgs, V0State),
+ msg_bytes_enqueue = rabbit_fifo_v0:get_field(msg_bytes_enqueue, V0State),
+ msg_bytes_checkout = rabbit_fifo_v0:get_field(msg_bytes_checkout, V0State),
+ waiting_consumers = rabbit_fifo_v0:get_field(waiting_consumers, V0State),
+ msg_bytes_in_memory = rabbit_fifo_v0:get_field(msg_bytes_in_memory, V0State),
+ msgs_ready_in_memory = rabbit_fifo_v0:get_field(msgs_ready_in_memory, V0State)
+ }.
+
+purge_node(Meta, Node, State, Effects) ->
+ lists:foldl(fun(Pid, {S0, E0}) ->
+ {S, E} = handle_down(Meta, Pid, S0),
+ {S, E0 ++ E}
+ end, {State, Effects}, all_pids_for(Node, State)).
+
+%% any downs that re not noconnection
+handle_down(Meta, Pid, #?MODULE{consumers = Cons0,
+ enqueuers = Enqs0} = State0) ->
+ % Remove any enqueuer for the same pid and enqueue any pending messages
+ % This should be ok as we won't see any more enqueues from this pid
+ State1 = case maps:take(Pid, Enqs0) of
+ {#enqueuer{pending = Pend}, Enqs} ->
+ lists:foldl(fun ({_, RIdx, RawMsg}, S) ->
+ enqueue(RIdx, RawMsg, S)
+ end, State0#?MODULE{enqueuers = Enqs}, Pend);
+ error ->
+ State0
+ end,
+ {Effects1, State2} = handle_waiting_consumer_down(Pid, State1),
+ % return checked out messages to main queue
+ % Find the consumers for the down pid
+ DownConsumers = maps:keys(
+ maps:filter(fun({_, P}, _) -> P =:= Pid end, Cons0)),
+ lists:foldl(fun(ConsumerId, {S, E}) ->
+ cancel_consumer(Meta, ConsumerId, S, E, down)
+ end, {State2, Effects1}, DownConsumers).
+
+consumer_active_flag_update_function(#?MODULE{cfg = #cfg{consumer_strategy = competing}}) ->
+ fun(State, ConsumerId, Consumer, Active, ActivityStatus, Effects) ->
+ consumer_update_active_effects(State, ConsumerId, Consumer, Active,
+ ActivityStatus, Effects)
+ end;
+consumer_active_flag_update_function(#?MODULE{cfg = #cfg{consumer_strategy = single_active}}) ->
+ fun(_, _, _, _, _, Effects) ->
+ Effects
+ end.
+
+handle_waiting_consumer_down(_Pid,
+ #?MODULE{cfg = #cfg{consumer_strategy = competing}} = State) ->
+ {[], State};
+handle_waiting_consumer_down(_Pid,
+ #?MODULE{cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = []} = State) ->
+ {[], State};
+handle_waiting_consumer_down(Pid,
+ #?MODULE{cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = WaitingConsumers0} = State0) ->
+ % get cancel effects for down waiting consumers
+ Down = lists:filter(fun({{_, P}, _}) -> P =:= Pid end,
+ WaitingConsumers0),
+ Effects = lists:foldl(fun ({ConsumerId, _}, Effects) ->
+ cancel_consumer_effects(ConsumerId, State0,
+ Effects)
+ end, [], Down),
+ % update state to have only up waiting consumers
+ StillUp = lists:filter(fun({{_, P}, _}) -> P =/= Pid end,
+ WaitingConsumers0),
+ State = State0#?MODULE{waiting_consumers = StillUp},
+ {Effects, State}.
+
+update_waiting_consumer_status(Node,
+ #?MODULE{waiting_consumers = WaitingConsumers},
+ Status) ->
+ [begin
+ case node(Pid) of
+ Node ->
+ {ConsumerId, Consumer#consumer{status = Status}};
+ _ ->
+ {ConsumerId, Consumer}
+ end
+ end || {{_, Pid} = ConsumerId, Consumer} <- WaitingConsumers,
+ Consumer#consumer.status =/= cancelled].
+
+-spec state_enter(ra_server:ra_state(), state()) -> ra_machine:effects().
+state_enter(leader, #?MODULE{consumers = Cons,
+ enqueuers = Enqs,
+ waiting_consumers = WaitingConsumers,
+ cfg = #cfg{name = Name,
+ resource = Resource,
+ become_leader_handler = BLH},
+ prefix_msgs = {0, [], 0, []}
+ }) ->
+ % return effects to monitor all current consumers and enqueuers
+ Pids = lists:usort(maps:keys(Enqs)
+ ++ [P || {_, P} <- maps:keys(Cons)]
+ ++ [P || {{_, P}, _} <- WaitingConsumers]),
+ Mons = [{monitor, process, P} || P <- Pids],
+ Nots = [{send_msg, P, leader_change, ra_event} || P <- Pids],
+ NodeMons = lists:usort([{monitor, node, node(P)} || P <- Pids]),
+ FHReservation = [{mod_call, rabbit_quorum_queue, file_handle_leader_reservation, [Resource]}],
+ Effects = Mons ++ Nots ++ NodeMons ++ FHReservation,
+ case BLH of
+ undefined ->
+ Effects;
+ {Mod, Fun, Args} ->
+ [{mod_call, Mod, Fun, Args ++ [Name]} | Effects]
+ end;
+state_enter(eol, #?MODULE{enqueuers = Enqs,
+ consumers = Custs0,
+ waiting_consumers = WaitingConsumers0}) ->
+ Custs = maps:fold(fun({_, P}, V, S) -> S#{P => V} end, #{}, Custs0),
+ WaitingConsumers1 = lists:foldl(fun({{_, P}, V}, Acc) -> Acc#{P => V} end,
+ #{}, WaitingConsumers0),
+ AllConsumers = maps:merge(Custs, WaitingConsumers1),
+ [{send_msg, P, eol, ra_event}
+ || P <- maps:keys(maps:merge(Enqs, AllConsumers))] ++
+ [{mod_call, rabbit_quorum_queue, file_handle_release_reservation, []}];
+state_enter(State, #?MODULE{cfg = #cfg{resource = _Resource}}) when State =/= leader ->
+ FHReservation = {mod_call, rabbit_quorum_queue, file_handle_other_reservation, []},
+ [FHReservation];
+ state_enter(_, _) ->
+ %% catch all as not handling all states
+ [].
+
+
+-spec tick(non_neg_integer(), state()) -> ra_machine:effects().
+tick(Ts, #?MODULE{cfg = #cfg{name = Name,
+ resource = QName},
+ msg_bytes_enqueue = EnqueueBytes,
+ msg_bytes_checkout = CheckoutBytes} = State) ->
+ case is_expired(Ts, State) of
+ true ->
+ [{mod_call, rabbit_quorum_queue, spawn_deleter, [QName]}];
+ false ->
+ Metrics = {Name,
+ messages_ready(State),
+ num_checked_out(State), % checked out
+ messages_total(State),
+ query_consumer_count(State), % Consumers
+ EnqueueBytes,
+ CheckoutBytes},
+ [{mod_call, rabbit_quorum_queue,
+ handle_tick, [QName, Metrics, all_nodes(State)]}]
+ end.
+
+-spec overview(state()) -> map().
+overview(#?MODULE{consumers = Cons,
+ enqueuers = Enqs,
+ release_cursors = Cursors,
+ enqueue_count = EnqCount,
+ msg_bytes_enqueue = EnqueueBytes,
+ msg_bytes_checkout = CheckoutBytes,
+ cfg = Cfg} = State) ->
+ Conf = #{name => Cfg#cfg.name,
+ resource => Cfg#cfg.resource,
+ release_cursor_interval => Cfg#cfg.release_cursor_interval,
+ dead_lettering_enabled => undefined =/= Cfg#cfg.dead_letter_handler,
+ max_length => Cfg#cfg.max_length,
+ max_bytes => Cfg#cfg.max_bytes,
+ consumer_strategy => Cfg#cfg.consumer_strategy,
+ max_in_memory_length => Cfg#cfg.max_in_memory_length,
+ max_in_memory_bytes => Cfg#cfg.max_in_memory_bytes,
+ expires => Cfg#cfg.expires,
+ delivery_limit => Cfg#cfg.delivery_limit
+ },
+ #{type => ?MODULE,
+ config => Conf,
+ num_consumers => maps:size(Cons),
+ num_checked_out => num_checked_out(State),
+ num_enqueuers => maps:size(Enqs),
+ num_ready_messages => messages_ready(State),
+ num_messages => messages_total(State),
+ num_release_cursors => lqueue:len(Cursors),
+ release_cursors => [I || {_, I, _} <- lqueue:to_list(Cursors)],
+ release_cursor_enqueue_counter => EnqCount,
+ enqueue_message_bytes => EnqueueBytes,
+ checkout_message_bytes => CheckoutBytes}.
+
+-spec get_checked_out(consumer_id(), msg_id(), msg_id(), state()) ->
+ [delivery_msg()].
+get_checked_out(Cid, From, To, #?MODULE{consumers = Consumers}) ->
+ case Consumers of
+ #{Cid := #consumer{checked_out = Checked}} ->
+ [{K, snd(snd(maps:get(K, Checked)))}
+ || K <- lists:seq(From, To),
+ maps:is_key(K, Checked)];
+ _ ->
+ []
+ end.
+
+-spec version() -> pos_integer().
+version() -> 1.
+
+which_module(0) -> rabbit_fifo_v0;
+which_module(1) -> ?MODULE.
+
+-record(aux_gc, {last_raft_idx = 0 :: ra:index()}).
+-record(aux, {name :: atom(),
+ utilisation :: term(),
+ gc = #aux_gc{} :: #aux_gc{}}).
+
+init_aux(Name) when is_atom(Name) ->
+ %% TODO: catch specific exception throw if table already exists
+ ok = ra_machine_ets:create_table(rabbit_fifo_usage,
+ [named_table, set, public,
+ {write_concurrency, true}]),
+ Now = erlang:monotonic_time(micro_seconds),
+ #aux{name = Name,
+ utilisation = {inactive, Now, 1, 1.0}}.
+
+handle_aux(leader, _, garbage_collection, State, Log, _MacState) ->
+ ra_log_wal:force_roll_over(ra_log_wal),
+ {no_reply, State, Log};
+handle_aux(follower, _, garbage_collection, State, Log, MacState) ->
+ ra_log_wal:force_roll_over(ra_log_wal),
+ {no_reply, force_eval_gc(Log, MacState, State), Log};
+handle_aux(_RaState, cast, eval, Aux0, Log, _MacState) ->
+ {no_reply, Aux0, Log};
+handle_aux(_RaState, cast, Cmd, #aux{utilisation = Use0} = Aux0,
+ Log, _MacState)
+ when Cmd == active orelse Cmd == inactive ->
+ {no_reply, Aux0#aux{utilisation = update_use(Use0, Cmd)}, Log};
+handle_aux(_RaState, cast, tick, #aux{name = Name,
+ utilisation = Use0} = State0,
+ Log, MacState) ->
+ true = ets:insert(rabbit_fifo_usage,
+ {Name, utilisation(Use0)}),
+ Aux = eval_gc(Log, MacState, State0),
+ {no_reply, Aux, Log};
+handle_aux(_RaState, {call, _From}, {peek, Pos}, Aux0,
+ Log0, MacState) ->
+ case rabbit_fifo:query_peek(Pos, MacState) of
+ {ok, {Idx, {Header, empty}}} ->
+ %% need to re-hydrate from the log
+ {{_, _, {_, _, Cmd, _}}, Log} = ra_log:fetch(Idx, Log0),
+ #enqueue{msg = Msg} = Cmd,
+ {reply, {ok, {Header, Msg}}, Aux0, Log};
+ {ok, {_Idx, {Header, Msg}}} ->
+ {reply, {ok, {Header, Msg}}, Aux0, Log0};
+ Err ->
+ {reply, Err, Aux0, Log0}
+ end.
+
+
+eval_gc(Log, #?MODULE{cfg = #cfg{resource = QR}} = MacState,
+ #aux{gc = #aux_gc{last_raft_idx = LastGcIdx} = Gc} = AuxState) ->
+ {Idx, _} = ra_log:last_index_term(Log),
+ {memory, Mem} = erlang:process_info(self(), memory),
+ case messages_total(MacState) of
+ 0 when Idx > LastGcIdx andalso
+ Mem > ?GC_MEM_LIMIT_B ->
+ garbage_collect(),
+ {memory, MemAfter} = erlang:process_info(self(), memory),
+ rabbit_log:debug("~s: full GC sweep complete. "
+ "Process memory changed from ~.2fMB to ~.2fMB.",
+ [rabbit_misc:rs(QR), Mem/?MB, MemAfter/?MB]),
+ AuxState#aux{gc = Gc#aux_gc{last_raft_idx = Idx}};
+ _ ->
+ AuxState
+ end.
+
+force_eval_gc(Log, #?MODULE{cfg = #cfg{resource = QR}},
+ #aux{gc = #aux_gc{last_raft_idx = LastGcIdx} = Gc} = AuxState) ->
+ {Idx, _} = ra_log:last_index_term(Log),
+ {memory, Mem} = erlang:process_info(self(), memory),
+ case Idx > LastGcIdx of
+ true ->
+ garbage_collect(),
+ {memory, MemAfter} = erlang:process_info(self(), memory),
+ rabbit_log:debug("~s: full GC sweep complete. "
+ "Process memory changed from ~.2fMB to ~.2fMB.",
+ [rabbit_misc:rs(QR), Mem/?MB, MemAfter/?MB]),
+ AuxState#aux{gc = Gc#aux_gc{last_raft_idx = Idx}};
+ false ->
+ AuxState
+ end.
+
+%%% Queries
+
+query_messages_ready(State) ->
+ messages_ready(State).
+
+query_messages_checked_out(#?MODULE{consumers = Consumers}) ->
+ maps:fold(fun (_, #consumer{checked_out = C}, S) ->
+ maps:size(C) + S
+ end, 0, Consumers).
+
+query_messages_total(State) ->
+ messages_total(State).
+
+query_processes(#?MODULE{enqueuers = Enqs, consumers = Cons0}) ->
+ Cons = maps:fold(fun({_, P}, V, S) -> S#{P => V} end, #{}, Cons0),
+ maps:keys(maps:merge(Enqs, Cons)).
+
+
+query_ra_indexes(#?MODULE{ra_indexes = RaIndexes}) ->
+ RaIndexes.
+
+query_consumer_count(#?MODULE{consumers = Consumers,
+ waiting_consumers = WaitingConsumers}) ->
+ Up = maps:filter(fun(_ConsumerId, #consumer{status = Status}) ->
+ Status =/= suspected_down
+ end, Consumers),
+ maps:size(Up) + length(WaitingConsumers).
+
+query_consumers(#?MODULE{consumers = Consumers,
+ waiting_consumers = WaitingConsumers,
+ cfg = #cfg{consumer_strategy = ConsumerStrategy}} = State) ->
+ ActiveActivityStatusFun =
+ case ConsumerStrategy of
+ competing ->
+ fun(_ConsumerId,
+ #consumer{status = Status}) ->
+ case Status of
+ suspected_down ->
+ {false, Status};
+ _ ->
+ {true, Status}
+ end
+ end;
+ single_active ->
+ SingleActiveConsumer = query_single_active_consumer(State),
+ fun({Tag, Pid} = _Consumer, _) ->
+ case SingleActiveConsumer of
+ {value, {Tag, Pid}} ->
+ {true, single_active};
+ _ ->
+ {false, waiting}
+ end
+ end
+ end,
+ FromConsumers =
+ maps:fold(fun (_, #consumer{status = cancelled}, Acc) ->
+ Acc;
+ ({Tag, Pid}, #consumer{meta = Meta} = Consumer, Acc) ->
+ {Active, ActivityStatus} =
+ ActiveActivityStatusFun({Tag, Pid}, Consumer),
+ maps:put({Tag, Pid},
+ {Pid, Tag,
+ maps:get(ack, Meta, undefined),
+ maps:get(prefetch, Meta, undefined),
+ Active,
+ ActivityStatus,
+ maps:get(args, Meta, []),
+ maps:get(username, Meta, undefined)},
+ Acc)
+ end, #{}, Consumers),
+ FromWaitingConsumers =
+ lists:foldl(fun ({_, #consumer{status = cancelled}}, Acc) ->
+ Acc;
+ ({{Tag, Pid}, #consumer{meta = Meta} = Consumer}, Acc) ->
+ {Active, ActivityStatus} =
+ ActiveActivityStatusFun({Tag, Pid}, Consumer),
+ maps:put({Tag, Pid},
+ {Pid, Tag,
+ maps:get(ack, Meta, undefined),
+ maps:get(prefetch, Meta, undefined),
+ Active,
+ ActivityStatus,
+ maps:get(args, Meta, []),
+ maps:get(username, Meta, undefined)},
+ Acc)
+ end, #{}, WaitingConsumers),
+ maps:merge(FromConsumers, FromWaitingConsumers).
+
+
+query_single_active_consumer(#?MODULE{cfg = #cfg{consumer_strategy = single_active},
+ consumers = Consumers}) ->
+ case maps:size(Consumers) of
+ 0 ->
+ {error, no_value};
+ 1 ->
+ {value, lists:nth(1, maps:keys(Consumers))};
+ _
+ ->
+ {error, illegal_size}
+ end ;
+query_single_active_consumer(_) ->
+ disabled.
+
+query_stat(#?MODULE{consumers = Consumers} = State) ->
+ {messages_ready(State), maps:size(Consumers)}.
+
+query_in_memory_usage(#?MODULE{msg_bytes_in_memory = Bytes,
+ msgs_ready_in_memory = Length}) ->
+ {Length, Bytes}.
+
+query_peek(Pos, State0) when Pos > 0 ->
+ case take_next_msg(State0) of
+ empty ->
+ {error, no_message_at_pos};
+ {{_Seq, IdxMsg}, _State}
+ when Pos == 1 ->
+ {ok, IdxMsg};
+ {_Msg, State} ->
+ query_peek(Pos-1, State)
+ end.
+
+
+-spec usage(atom()) -> float().
+usage(Name) when is_atom(Name) ->
+ case ets:lookup(rabbit_fifo_usage, Name) of
+ [] -> 0.0;
+ [{_, Use}] -> Use
+ end.
+
+%%% Internal
+
+messages_ready(#?MODULE{messages = M,
+ prefix_msgs = {RCnt, _R, PCnt, _P},
+ returns = R}) ->
+ %% prefix messages will rarely have anything in them during normal
+ %% operations so length/1 is fine here
+ lqueue:len(M) + lqueue:len(R) + RCnt + PCnt.
+
+messages_total(#?MODULE{ra_indexes = I,
+ prefix_msgs = {RCnt, _R, PCnt, _P}}) ->
+ rabbit_fifo_index:size(I) + RCnt + PCnt.
+
+update_use({inactive, _, _, _} = CUInfo, inactive) ->
+ CUInfo;
+update_use({active, _, _} = CUInfo, active) ->
+ CUInfo;
+update_use({active, Since, Avg}, inactive) ->
+ Now = erlang:monotonic_time(micro_seconds),
+ {inactive, Now, Now - Since, Avg};
+update_use({inactive, Since, Active, Avg}, active) ->
+ Now = erlang:monotonic_time(micro_seconds),
+ {active, Now, use_avg(Active, Now - Since, Avg)}.
+
+utilisation({active, Since, Avg}) ->
+ use_avg(erlang:monotonic_time(micro_seconds) - Since, 0, Avg);
+utilisation({inactive, Since, Active, Avg}) ->
+ use_avg(Active, erlang:monotonic_time(micro_seconds) - Since, Avg).
+
+use_avg(0, 0, Avg) ->
+ Avg;
+use_avg(Active, Inactive, Avg) ->
+ Time = Inactive + Active,
+ moving_average(Time, ?USE_AVG_HALF_LIFE, Active / Time, Avg).
+
+moving_average(_Time, _, Next, undefined) ->
+ Next;
+moving_average(Time, HalfLife, Next, Current) ->
+ Weight = math:exp(Time * math:log(0.5) / HalfLife),
+ Next * (1 - Weight) + Current * Weight.
+
+num_checked_out(#?MODULE{consumers = Cons}) ->
+ maps:fold(fun (_, #consumer{checked_out = C}, Acc) ->
+ maps:size(C) + Acc
+ end, 0, Cons).
+
+cancel_consumer(Meta, ConsumerId,
+ #?MODULE{cfg = #cfg{consumer_strategy = competing}} = State,
+ Effects, Reason) ->
+ cancel_consumer0(Meta, ConsumerId, State, Effects, Reason);
+cancel_consumer(Meta, ConsumerId,
+ #?MODULE{cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = []} = State,
+ Effects, Reason) ->
+ %% single active consumer on, no consumers are waiting
+ cancel_consumer0(Meta, ConsumerId, State, Effects, Reason);
+cancel_consumer(Meta, ConsumerId,
+ #?MODULE{consumers = Cons0,
+ cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = Waiting0} = State0,
+ Effects0, Reason) ->
+ %% single active consumer on, consumers are waiting
+ case maps:is_key(ConsumerId, Cons0) of
+ true ->
+ % The active consumer is to be removed
+ {State1, Effects1} = cancel_consumer0(Meta, ConsumerId, State0,
+ Effects0, Reason),
+ activate_next_consumer(State1, Effects1);
+ false ->
+ % The cancelled consumer is not active or cancelled
+ % Just remove it from idle_consumers
+ Waiting = lists:keydelete(ConsumerId, 1, Waiting0),
+ Effects = cancel_consumer_effects(ConsumerId, State0, Effects0),
+ % A waiting consumer isn't supposed to have any checked out messages,
+ % so nothing special to do here
+ {State0#?MODULE{waiting_consumers = Waiting}, Effects}
+ end.
+
+consumer_update_active_effects(#?MODULE{cfg = #cfg{resource = QName}},
+ ConsumerId, #consumer{meta = Meta},
+ Active, ActivityStatus,
+ Effects) ->
+ Ack = maps:get(ack, Meta, undefined),
+ Prefetch = maps:get(prefetch, Meta, undefined),
+ Args = maps:get(args, Meta, []),
+ [{mod_call, rabbit_quorum_queue, update_consumer_handler,
+ [QName, ConsumerId, false, Ack, Prefetch, Active, ActivityStatus, Args]}
+ | Effects].
+
+cancel_consumer0(Meta, ConsumerId,
+ #?MODULE{consumers = C0} = S0, Effects0, Reason) ->
+ case C0 of
+ #{ConsumerId := Consumer} ->
+ {S, Effects2} = maybe_return_all(Meta, ConsumerId, Consumer,
+ S0, Effects0, Reason),
+ %% The effects are emitted before the consumer is actually removed
+ %% if the consumer has unacked messages. This is a bit weird but
+ %% in line with what classic queues do (from an external point of
+ %% view)
+ Effects = cancel_consumer_effects(ConsumerId, S, Effects2),
+ case maps:size(S#?MODULE.consumers) of
+ 0 ->
+ {S, [{aux, inactive} | Effects]};
+ _ ->
+ {S, Effects}
+ end;
+ _ ->
+ %% already removed: do nothing
+ {S0, Effects0}
+ end.
+
+activate_next_consumer(#?MODULE{consumers = Cons,
+ waiting_consumers = Waiting0} = State0,
+ Effects0) ->
+ case maps:filter(fun (_, #consumer{status = S}) -> S == up end, Cons) of
+ Up when map_size(Up) == 0 ->
+ %% there are no active consumer in the consumer map
+ case lists:filter(fun ({_, #consumer{status = Status}}) ->
+ Status == up
+ end, Waiting0) of
+ [{NextConsumerId, NextConsumer} | _] ->
+ %% there is a potential next active consumer
+ Remaining = lists:keydelete(NextConsumerId, 1, Waiting0),
+ #?MODULE{service_queue = ServiceQueue} = State0,
+ ServiceQueue1 = maybe_queue_consumer(NextConsumerId,
+ NextConsumer,
+ ServiceQueue),
+ State = State0#?MODULE{consumers = Cons#{NextConsumerId => NextConsumer},
+ service_queue = ServiceQueue1,
+ waiting_consumers = Remaining},
+ Effects = consumer_update_active_effects(State, NextConsumerId,
+ NextConsumer, true,
+ single_active, Effects0),
+ {State, Effects};
+ [] ->
+ {State0, [{aux, inactive} | Effects0]}
+ end;
+ _ ->
+ {State0, Effects0}
+ end.
+
+
+
+maybe_return_all(#{system_time := Ts} = Meta, ConsumerId, Consumer, S0, Effects0, Reason) ->
+ case Reason of
+ consumer_cancel ->
+ {update_or_remove_sub(Meta, ConsumerId,
+ Consumer#consumer{lifetime = once,
+ credit = 0,
+ status = cancelled},
+ S0), Effects0};
+ down ->
+ {S1, Effects1} = return_all(Meta, S0, Effects0, ConsumerId, Consumer),
+ {S1#?MODULE{consumers = maps:remove(ConsumerId, S1#?MODULE.consumers),
+ last_active = Ts},
+ Effects1}
+ end.
+
+apply_enqueue(#{index := RaftIdx} = Meta, From, Seq, RawMsg, State0) ->
+ case maybe_enqueue(RaftIdx, From, Seq, RawMsg, [], State0) of
+ {ok, State1, Effects1} ->
+ State2 = append_to_master_index(RaftIdx, State1),
+ {State, ok, Effects} = checkout(Meta, State0, State2, Effects1),
+ {maybe_store_dehydrated_state(RaftIdx, State), ok, Effects};
+ {duplicate, State, Effects} ->
+ {State, ok, Effects}
+ end.
+
+drop_head(#?MODULE{ra_indexes = Indexes0} = State0, Effects0) ->
+ case take_next_msg(State0) of
+ {FullMsg = {_MsgId, {RaftIdxToDrop, {Header, Msg}}},
+ State1} ->
+ Indexes = rabbit_fifo_index:delete(RaftIdxToDrop, Indexes0),
+ State2 = add_bytes_drop(Header, State1#?MODULE{ra_indexes = Indexes}),
+ State = case Msg of
+ 'empty' -> State2;
+ _ -> subtract_in_memory_counts(Header, State2)
+ end,
+ Effects = dead_letter_effects(maxlen, #{none => FullMsg},
+ State, Effects0),
+ {State, Effects};
+ {{'$prefix_msg', Header}, State1} ->
+ State2 = subtract_in_memory_counts(Header, add_bytes_drop(Header, State1)),
+ {State2, Effects0};
+ {{'$empty_msg', Header}, State1} ->
+ State2 = add_bytes_drop(Header, State1),
+ {State2, Effects0};
+ empty ->
+ {State0, Effects0}
+ end.
+
+enqueue(RaftIdx, RawMsg, #?MODULE{messages = Messages,
+ next_msg_num = NextMsgNum} = State0) ->
+ %% the initial header is an integer only - it will get expanded to a map
+ %% when the next required key is added
+ Header = message_size(RawMsg),
+ {State1, Msg} =
+ case evaluate_memory_limit(Header, State0) of
+ true ->
+ % indexed message with header map
+ {State0, {RaftIdx, {Header, 'empty'}}};
+ false ->
+ {add_in_memory_counts(Header, State0),
+ {RaftIdx, {Header, RawMsg}}} % indexed message with header map
+ end,
+ State = add_bytes_enqueue(Header, State1),
+ State#?MODULE{messages = lqueue:in({NextMsgNum, Msg}, Messages),
+ next_msg_num = NextMsgNum + 1}.
+
+append_to_master_index(RaftIdx,
+ #?MODULE{ra_indexes = Indexes0} = State0) ->
+ State = incr_enqueue_count(State0),
+ Indexes = rabbit_fifo_index:append(RaftIdx, Indexes0),
+ State#?MODULE{ra_indexes = Indexes}.
+
+
+incr_enqueue_count(#?MODULE{enqueue_count = EC,
+ cfg = #cfg{release_cursor_interval = {_Base, C}}
+ } = State0) when EC >= C->
+ %% this will trigger a dehydrated version of the state to be stored
+ %% at this raft index for potential future snapshot generation
+ %% Q: Why don't we just stash the release cursor here?
+ %% A: Because it needs to be the very last thing we do and we
+ %% first needs to run the checkout logic.
+ State0#?MODULE{enqueue_count = 0};
+incr_enqueue_count(#?MODULE{enqueue_count = C} = State) ->
+ State#?MODULE{enqueue_count = C + 1}.
+
+maybe_store_dehydrated_state(RaftIdx,
+ #?MODULE{cfg =
+ #cfg{release_cursor_interval = {Base, _}}
+ = Cfg,
+ ra_indexes = Indexes,
+ enqueue_count = 0,
+ release_cursors = Cursors0} = State0) ->
+ case rabbit_fifo_index:exists(RaftIdx, Indexes) of
+ false ->
+ %% the incoming enqueue must already have been dropped
+ State0;
+ true ->
+ Interval = case Base of
+ 0 -> 0;
+ _ ->
+ Total = messages_total(State0),
+ min(max(Total, Base), ?RELEASE_CURSOR_EVERY_MAX)
+ end,
+ State = State0#?MODULE{cfg = Cfg#cfg{release_cursor_interval =
+ {Base, Interval}}},
+ Dehydrated = dehydrate_state(State),
+ Cursor = {release_cursor, RaftIdx, Dehydrated},
+ Cursors = lqueue:in(Cursor, Cursors0),
+ State#?MODULE{release_cursors = Cursors}
+ end;
+maybe_store_dehydrated_state(_RaftIdx, State) ->
+ State.
+
+enqueue_pending(From,
+ #enqueuer{next_seqno = Next,
+ pending = [{Next, RaftIdx, RawMsg} | Pending]} = Enq0,
+ State0) ->
+ State = enqueue(RaftIdx, RawMsg, State0),
+ Enq = Enq0#enqueuer{next_seqno = Next + 1, pending = Pending},
+ enqueue_pending(From, Enq, State);
+enqueue_pending(From, Enq, #?MODULE{enqueuers = Enqueuers0} = State) ->
+ State#?MODULE{enqueuers = Enqueuers0#{From => Enq}}.
+
+maybe_enqueue(RaftIdx, undefined, undefined, RawMsg, Effects, State0) ->
+ % direct enqueue without tracking
+ State = enqueue(RaftIdx, RawMsg, State0),
+ {ok, State, Effects};
+maybe_enqueue(RaftIdx, From, MsgSeqNo, RawMsg, Effects0,
+ #?MODULE{enqueuers = Enqueuers0} = State0) ->
+ case maps:get(From, Enqueuers0, undefined) of
+ undefined ->
+ State1 = State0#?MODULE{enqueuers = Enqueuers0#{From => #enqueuer{}}},
+ {ok, State, Effects} = maybe_enqueue(RaftIdx, From, MsgSeqNo,
+ RawMsg, Effects0, State1),
+ {ok, State, [{monitor, process, From} | Effects]};
+ #enqueuer{next_seqno = MsgSeqNo} = Enq0 ->
+ % it is the next expected seqno
+ State1 = enqueue(RaftIdx, RawMsg, State0),
+ Enq = Enq0#enqueuer{next_seqno = MsgSeqNo + 1},
+ State = enqueue_pending(From, Enq, State1),
+ {ok, State, Effects0};
+ #enqueuer{next_seqno = Next,
+ pending = Pending0} = Enq0
+ when MsgSeqNo > Next ->
+ % out of order delivery
+ Pending = [{MsgSeqNo, RaftIdx, RawMsg} | Pending0],
+ Enq = Enq0#enqueuer{pending = lists:sort(Pending)},
+ {ok, State0#?MODULE{enqueuers = Enqueuers0#{From => Enq}}, Effects0};
+ #enqueuer{next_seqno = Next} when MsgSeqNo =< Next ->
+ % duplicate delivery - remove the raft index from the ra_indexes
+ % map as it was added earlier
+ {duplicate, State0, Effects0}
+ end.
+
+snd(T) ->
+ element(2, T).
+
+return(#{index := IncomingRaftIdx} = Meta, ConsumerId, Returned,
+ Effects0, State0) ->
+ {State1, Effects1} = maps:fold(
+ fun(MsgId, {Tag, _} = Msg, {S0, E0})
+ when Tag == '$prefix_msg';
+ Tag == '$empty_msg'->
+ return_one(Meta, MsgId, 0, Msg, S0, E0, ConsumerId);
+ (MsgId, {MsgNum, Msg}, {S0, E0}) ->
+ return_one(Meta, MsgId, MsgNum, Msg, S0, E0,
+ ConsumerId)
+ end, {State0, Effects0}, Returned),
+ State2 =
+ case State1#?MODULE.consumers of
+ #{ConsumerId := Con0} ->
+ Con = Con0#consumer{credit = increase_credit(Con0,
+ map_size(Returned))},
+ update_or_remove_sub(Meta, ConsumerId, Con, State1);
+ _ ->
+ State1
+ end,
+ {State, ok, Effects} = checkout(Meta, State0, State2, Effects1),
+ update_smallest_raft_index(IncomingRaftIdx, State, Effects).
+
+% used to processes messages that are finished
+complete(Meta, ConsumerId, Discarded,
+ #consumer{checked_out = Checked} = Con0, Effects,
+ #?MODULE{ra_indexes = Indexes0} = State0) ->
+ %% TODO optimise use of Discarded map here
+ MsgRaftIdxs = [RIdx || {_, {RIdx, _}} <- maps:values(Discarded)],
+ %% credit_mode = simple_prefetch should automatically top-up credit
+ %% as messages are simple_prefetch or otherwise returned
+ Con = Con0#consumer{checked_out = maps:without(maps:keys(Discarded), Checked),
+ credit = increase_credit(Con0, map_size(Discarded))},
+ State1 = update_or_remove_sub(Meta, ConsumerId, Con, State0),
+ Indexes = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes0,
+ MsgRaftIdxs),
+ %% TODO: use maps:fold instead
+ State2 = lists:foldl(fun({_, {_, {Header, _}}}, Acc) ->
+ add_bytes_settle(Header, Acc);
+ ({'$prefix_msg', Header}, Acc) ->
+ add_bytes_settle(Header, Acc);
+ ({'$empty_msg', Header}, Acc) ->
+ add_bytes_settle(Header, Acc)
+ end, State1, maps:values(Discarded)),
+ {State2#?MODULE{ra_indexes = Indexes}, Effects}.
+
+increase_credit(#consumer{lifetime = once,
+ credit = Credit}, _) ->
+ %% once consumers cannot increment credit
+ Credit;
+increase_credit(#consumer{lifetime = auto,
+ credit_mode = credited,
+ credit = Credit}, _) ->
+ %% credit_mode: credit also doesn't automatically increment credit
+ Credit;
+increase_credit(#consumer{credit = Current}, Credit) ->
+ Current + Credit.
+
+complete_and_checkout(#{index := IncomingRaftIdx} = Meta, MsgIds, ConsumerId,
+ #consumer{checked_out = Checked0} = Con0,
+ Effects0, State0) ->
+ Discarded = maps:with(MsgIds, Checked0),
+ {State2, Effects1} = complete(Meta, ConsumerId, Discarded, Con0,
+ Effects0, State0),
+ {State, ok, Effects} = checkout(Meta, State0, State2, Effects1),
+ update_smallest_raft_index(IncomingRaftIdx, State, Effects).
+
+dead_letter_effects(_Reason, _Discarded,
+ #?MODULE{cfg = #cfg{dead_letter_handler = undefined}},
+ Effects) ->
+ Effects;
+dead_letter_effects(Reason, Discarded,
+ #?MODULE{cfg = #cfg{dead_letter_handler = {Mod, Fun, Args}}},
+ Effects) ->
+ RaftIdxs = maps:fold(
+ fun (_, {_, {RaftIdx, {_Header, 'empty'}}}, Acc) ->
+ [RaftIdx | Acc];
+ (_, _, Acc) ->
+ Acc
+ end, [], Discarded),
+ [{log, RaftIdxs,
+ fun (Log) ->
+ Lookup = maps:from_list(lists:zip(RaftIdxs, Log)),
+ DeadLetters = maps:fold(
+ fun (_, {_, {RaftIdx, {_Header, 'empty'}}}, Acc) ->
+ {enqueue, _, _, Msg} = maps:get(RaftIdx, Lookup),
+ [{Reason, Msg} | Acc];
+ (_, {_, {_, {_Header, Msg}}}, Acc) ->
+ [{Reason, Msg} | Acc];
+ (_, _, Acc) ->
+ Acc
+ end, [], Discarded),
+ [{mod_call, Mod, Fun, Args ++ [DeadLetters]}]
+ end} | Effects].
+
+cancel_consumer_effects(ConsumerId,
+ #?MODULE{cfg = #cfg{resource = QName}}, Effects) ->
+ [{mod_call, rabbit_quorum_queue,
+ cancel_consumer_handler, [QName, ConsumerId]} | Effects].
+
+update_smallest_raft_index(Idx, State, Effects) ->
+ update_smallest_raft_index(Idx, ok, State, Effects).
+
+update_smallest_raft_index(IncomingRaftIdx, Reply,
+ #?MODULE{cfg = Cfg,
+ ra_indexes = Indexes,
+ release_cursors = Cursors0} = State0,
+ Effects) ->
+ case rabbit_fifo_index:size(Indexes) of
+ 0 ->
+ % there are no messages on queue anymore and no pending enqueues
+ % we can forward release_cursor all the way until
+ % the last received command, hooray
+ %% reset the release cursor interval
+ #cfg{release_cursor_interval = {Base, _}} = Cfg,
+ RCI = {Base, Base},
+ State = State0#?MODULE{cfg = Cfg#cfg{release_cursor_interval = RCI},
+ release_cursors = lqueue:new(),
+ enqueue_count = 0},
+ {State, Reply, Effects ++ [{release_cursor, IncomingRaftIdx, State}]};
+ _ ->
+ Smallest = rabbit_fifo_index:smallest(Indexes),
+ case find_next_cursor(Smallest, Cursors0) of
+ {empty, Cursors} ->
+ {State0#?MODULE{release_cursors = Cursors}, Reply, Effects};
+ {Cursor, Cursors} ->
+ %% we can emit a release cursor when we've passed the smallest
+ %% release cursor available.
+ {State0#?MODULE{release_cursors = Cursors}, Reply,
+ Effects ++ [Cursor]}
+ end
+ end.
+
+find_next_cursor(Idx, Cursors) ->
+ find_next_cursor(Idx, Cursors, empty).
+
+find_next_cursor(Smallest, Cursors0, Potential) ->
+ case lqueue:out(Cursors0) of
+ {{value, {_, Idx, _} = Cursor}, Cursors} when Idx < Smallest ->
+ %% we found one but it may not be the largest one
+ find_next_cursor(Smallest, Cursors, Cursor);
+ _ ->
+ {Potential, Cursors0}
+ end.
+
+update_header(Key, UpdateFun, Default, Header)
+ when is_integer(Header) ->
+ update_header(Key, UpdateFun, Default, #{size => Header});
+update_header(Key, UpdateFun, Default, Header) ->
+ maps:update_with(Key, UpdateFun, Default, Header).
+
+
+return_one(Meta, MsgId, 0, {Tag, Header0},
+ #?MODULE{returns = Returns,
+ consumers = Consumers,
+ cfg = #cfg{delivery_limit = DeliveryLimit}} = State0,
+ Effects0, ConsumerId)
+ when Tag == '$prefix_msg'; Tag == '$empty_msg' ->
+ #consumer{checked_out = Checked} = Con0 = maps:get(ConsumerId, Consumers),
+ Header = update_header(delivery_count, fun (C) -> C+1 end, 1, Header0),
+ Msg0 = {Tag, Header},
+ case maps:get(delivery_count, Header) of
+ DeliveryCount when DeliveryCount > DeliveryLimit ->
+ complete(Meta, ConsumerId, #{MsgId => Msg0}, Con0, Effects0, State0);
+ _ ->
+ %% this should not affect the release cursor in any way
+ Con = Con0#consumer{checked_out = maps:remove(MsgId, Checked)},
+ {Msg, State1} = case Tag of
+ '$empty_msg' ->
+ {Msg0, State0};
+ _ -> case evaluate_memory_limit(Header, State0) of
+ true ->
+ {{'$empty_msg', Header}, State0};
+ false ->
+ {Msg0, add_in_memory_counts(Header, State0)}
+ end
+ end,
+ {add_bytes_return(
+ Header,
+ State1#?MODULE{consumers = Consumers#{ConsumerId => Con},
+ returns = lqueue:in(Msg, Returns)}),
+ Effects0}
+ end;
+return_one(Meta, MsgId, MsgNum, {RaftId, {Header0, RawMsg}},
+ #?MODULE{returns = Returns,
+ consumers = Consumers,
+ cfg = #cfg{delivery_limit = DeliveryLimit}} = State0,
+ Effects0, ConsumerId) ->
+ #consumer{checked_out = Checked} = Con0 = maps:get(ConsumerId, Consumers),
+ Header = update_header(delivery_count, fun (C) -> C+1 end, 1, Header0),
+ Msg0 = {RaftId, {Header, RawMsg}},
+ case maps:get(delivery_count, Header) of
+ DeliveryCount when DeliveryCount > DeliveryLimit ->
+ DlMsg = {MsgNum, Msg0},
+ Effects = dead_letter_effects(delivery_limit, #{none => DlMsg},
+ State0, Effects0),
+ complete(Meta, ConsumerId, #{MsgId => DlMsg}, Con0, Effects, State0);
+ _ ->
+ Con = Con0#consumer{checked_out = maps:remove(MsgId, Checked)},
+ %% this should not affect the release cursor in any way
+ {Msg, State1} = case RawMsg of
+ 'empty' ->
+ {Msg0, State0};
+ _ ->
+ case evaluate_memory_limit(Header, State0) of
+ true ->
+ {{RaftId, {Header, 'empty'}}, State0};
+ false ->
+ {Msg0, add_in_memory_counts(Header, State0)}
+ end
+ end,
+ {add_bytes_return(
+ Header,
+ State1#?MODULE{consumers = Consumers#{ConsumerId => Con},
+ returns = lqueue:in({MsgNum, Msg}, Returns)}),
+ Effects0}
+ end.
+
+return_all(Meta, #?MODULE{consumers = Cons} = State0, Effects0, ConsumerId,
+ #consumer{checked_out = Checked0} = Con) ->
+ %% need to sort the list so that we return messages in the order
+ %% they were checked out
+ Checked = lists:sort(maps:to_list(Checked0)),
+ State = State0#?MODULE{consumers = Cons#{ConsumerId => Con}},
+ lists:foldl(fun ({MsgId, {'$prefix_msg', _} = Msg}, {S, E}) ->
+ return_one(Meta, MsgId, 0, Msg, S, E, ConsumerId);
+ ({MsgId, {'$empty_msg', _} = Msg}, {S, E}) ->
+ return_one(Meta, MsgId, 0, Msg, S, E, ConsumerId);
+ ({MsgId, {MsgNum, Msg}}, {S, E}) ->
+ return_one(Meta, MsgId, MsgNum, Msg, S, E, ConsumerId)
+ end, {State, Effects0}, Checked).
+
+%% checkout new messages to consumers
+checkout(#{index := Index} = Meta, OldState, State0, Effects0) ->
+ {State1, _Result, Effects1} = checkout0(Meta, checkout_one(Meta, State0),
+ Effects0, {#{}, #{}}),
+ case evaluate_limit(Index, false, OldState, State1, Effects1) of
+ {State, true, Effects} ->
+ update_smallest_raft_index(Index, State, Effects);
+ {State, false, Effects} ->
+ {State, ok, Effects}
+ end.
+
+checkout0(Meta, {success, ConsumerId, MsgId, {RaftIdx, {Header, 'empty'}}, State},
+ Effects, {SendAcc, LogAcc0}) ->
+ DelMsg = {RaftIdx, {MsgId, Header}},
+ LogAcc = maps:update_with(ConsumerId,
+ fun (M) -> [DelMsg | M] end,
+ [DelMsg], LogAcc0),
+ checkout0(Meta, checkout_one(Meta, State), Effects, {SendAcc, LogAcc});
+checkout0(Meta, {success, ConsumerId, MsgId, Msg, State}, Effects,
+ {SendAcc0, LogAcc}) ->
+ DelMsg = {MsgId, Msg},
+ SendAcc = maps:update_with(ConsumerId,
+ fun (M) -> [DelMsg | M] end,
+ [DelMsg], SendAcc0),
+ checkout0(Meta, checkout_one(Meta, State), Effects, {SendAcc, LogAcc});
+checkout0(_Meta, {Activity, State0}, Effects0, {SendAcc, LogAcc}) ->
+ Effects1 = case Activity of
+ nochange ->
+ append_send_msg_effects(
+ append_log_effects(Effects0, LogAcc), SendAcc);
+ inactive ->
+ [{aux, inactive}
+ | append_send_msg_effects(
+ append_log_effects(Effects0, LogAcc), SendAcc)]
+ end,
+ {State0, ok, lists:reverse(Effects1)}.
+
+evaluate_limit(_Index, Result, _BeforeState,
+ #?MODULE{cfg = #cfg{max_length = undefined,
+ max_bytes = undefined}} = State,
+ Effects) ->
+ {State, Result, Effects};
+evaluate_limit(Index, Result, BeforeState,
+ #?MODULE{cfg = #cfg{overflow_strategy = Strategy},
+ enqueuers = Enqs0} = State0,
+ Effects0) ->
+ case is_over_limit(State0) of
+ true when Strategy == drop_head ->
+ {State, Effects} = drop_head(State0, Effects0),
+ evaluate_limit(Index, true, BeforeState, State, Effects);
+ true when Strategy == reject_publish ->
+ %% generate send_msg effect for each enqueuer to let them know
+ %% they need to block
+ {Enqs, Effects} =
+ maps:fold(
+ fun (P, #enqueuer{blocked = undefined} = E0, {Enqs, Acc}) ->
+ E = E0#enqueuer{blocked = Index},
+ {Enqs#{P => E},
+ [{send_msg, P, {queue_status, reject_publish},
+ [ra_event]} | Acc]};
+ (_P, _E, Acc) ->
+ Acc
+ end, {Enqs0, Effects0}, Enqs0),
+ {State0#?MODULE{enqueuers = Enqs}, Result, Effects};
+ false when Strategy == reject_publish ->
+ %% TODO: optimise as this case gets called for every command
+ %% pretty much
+ Before = is_below_soft_limit(BeforeState),
+ case {Before, is_below_soft_limit(State0)} of
+ {false, true} ->
+ %% we have moved below the lower limit which
+ {Enqs, Effects} =
+ maps:fold(
+ fun (P, #enqueuer{} = E0, {Enqs, Acc}) ->
+ E = E0#enqueuer{blocked = undefined},
+ {Enqs#{P => E},
+ [{send_msg, P, {queue_status, go}, [ra_event]}
+ | Acc]};
+ (_P, _E, Acc) ->
+ Acc
+ end, {Enqs0, Effects0}, Enqs0),
+ {State0#?MODULE{enqueuers = Enqs}, Result, Effects};
+ _ ->
+ {State0, Result, Effects0}
+ end;
+ false ->
+ {State0, Result, Effects0}
+ end.
+
+evaluate_memory_limit(_Header,
+ #?MODULE{cfg = #cfg{max_in_memory_length = undefined,
+ max_in_memory_bytes = undefined}}) ->
+ false;
+evaluate_memory_limit(#{size := Size}, State) ->
+ evaluate_memory_limit(Size, State);
+evaluate_memory_limit(Size,
+ #?MODULE{cfg = #cfg{max_in_memory_length = MaxLength,
+ max_in_memory_bytes = MaxBytes},
+ msg_bytes_in_memory = Bytes,
+ msgs_ready_in_memory = Length})
+ when is_integer(Size) ->
+ (Length >= MaxLength) orelse ((Bytes + Size) > MaxBytes).
+
+append_send_msg_effects(Effects, AccMap) when map_size(AccMap) == 0 ->
+ Effects;
+append_send_msg_effects(Effects0, AccMap) ->
+ Effects = maps:fold(fun (C, Msgs, Ef) ->
+ [send_msg_effect(C, lists:reverse(Msgs)) | Ef]
+ end, Effects0, AccMap),
+ [{aux, active} | Effects].
+
+append_log_effects(Effects0, AccMap) ->
+ maps:fold(fun (C, Msgs, Ef) ->
+ [send_log_effect(C, lists:reverse(Msgs)) | Ef]
+ end, Effects0, AccMap).
+
+%% next message is determined as follows:
+%% First we check if there are are prefex returns
+%% Then we check if there are current returns
+%% then we check prefix msgs
+%% then we check current messages
+%%
+%% When we return it is always done to the current return queue
+%% for both prefix messages and current messages
+take_next_msg(#?MODULE{prefix_msgs = {R, P}} = State) ->
+ %% conversion
+ take_next_msg(State#?MODULE{prefix_msgs = {length(R), R, length(P), P}});
+take_next_msg(#?MODULE{prefix_msgs = {NumR, [{'$empty_msg', _} = Msg | Rem],
+ NumP, P}} = State) ->
+ %% there are prefix returns, these should be served first
+ {Msg, State#?MODULE{prefix_msgs = {NumR-1, Rem, NumP, P}}};
+take_next_msg(#?MODULE{prefix_msgs = {NumR, [Header | Rem], NumP, P}} = State) ->
+ %% there are prefix returns, these should be served first
+ {{'$prefix_msg', Header},
+ State#?MODULE{prefix_msgs = {NumR-1, Rem, NumP, P}}};
+take_next_msg(#?MODULE{returns = Returns,
+ messages = Messages0,
+ prefix_msgs = {NumR, R, NumP, P}} = State) ->
+ %% use peek rather than out there as the most likely case is an empty
+ %% queue
+ case lqueue:peek(Returns) of
+ {value, NextMsg} ->
+ {NextMsg,
+ State#?MODULE{returns = lqueue:drop(Returns)}};
+ empty when P == [] ->
+ case lqueue:out(Messages0) of
+ {empty, _} ->
+ empty;
+ {{value, {_, _} = SeqMsg}, Messages} ->
+ {SeqMsg, State#?MODULE{messages = Messages }}
+ end;
+ empty ->
+ [Msg | Rem] = P,
+ case Msg of
+ {Header, 'empty'} ->
+ %% There are prefix msgs
+ {{'$empty_msg', Header},
+ State#?MODULE{prefix_msgs = {NumR, R, NumP-1, Rem}}};
+ Header ->
+ {{'$prefix_msg', Header},
+ State#?MODULE{prefix_msgs = {NumR, R, NumP-1, Rem}}}
+ end
+ end.
+
+send_msg_effect({CTag, CPid}, Msgs) ->
+ {send_msg, CPid, {delivery, CTag, Msgs}, [local, ra_event]}.
+
+send_log_effect({CTag, CPid}, IdxMsgs) ->
+ {RaftIdxs, Data} = lists:unzip(IdxMsgs),
+ {log, RaftIdxs,
+ fun(Log) ->
+ Msgs = lists:zipwith(fun ({enqueue, _, _, Msg}, {MsgId, Header}) ->
+ {MsgId, {Header, Msg}}
+ end, Log, Data),
+ [{send_msg, CPid, {delivery, CTag, Msgs}, [local, ra_event]}]
+ end,
+ {local, node(CPid)}}.
+
+reply_log_effect(RaftIdx, MsgId, Header, Ready, From) ->
+ {log, [RaftIdx],
+ fun([{enqueue, _, _, Msg}]) ->
+ [{reply, From, {wrap_reply,
+ {dequeue, {MsgId, {Header, Msg}}, Ready}}}]
+ end}.
+
+checkout_one(Meta, #?MODULE{service_queue = SQ0,
+ messages = Messages0,
+ consumers = Cons0} = InitState) ->
+ case priority_queue:out(SQ0) of
+ {{value, ConsumerId}, SQ1} ->
+ case take_next_msg(InitState) of
+ {ConsumerMsg, State0} ->
+ %% there are consumers waiting to be serviced
+ %% process consumer checkout
+ case maps:find(ConsumerId, Cons0) of
+ {ok, #consumer{credit = 0}} ->
+ %% no credit but was still on queue
+ %% can happen when draining
+ %% recurse without consumer on queue
+ checkout_one(Meta, InitState#?MODULE{service_queue = SQ1});
+ {ok, #consumer{status = cancelled}} ->
+ checkout_one(Meta, InitState#?MODULE{service_queue = SQ1});
+ {ok, #consumer{status = suspected_down}} ->
+ checkout_one(Meta, InitState#?MODULE{service_queue = SQ1});
+ {ok, #consumer{checked_out = Checked0,
+ next_msg_id = Next,
+ credit = Credit,
+ delivery_count = DelCnt} = Con0} ->
+ Checked = maps:put(Next, ConsumerMsg, Checked0),
+ Con = Con0#consumer{checked_out = Checked,
+ next_msg_id = Next + 1,
+ credit = Credit - 1,
+ delivery_count = DelCnt + 1},
+ State1 = update_or_remove_sub(Meta,
+ ConsumerId, Con,
+ State0#?MODULE{service_queue = SQ1}),
+ {State, Msg} =
+ case ConsumerMsg of
+ {'$prefix_msg', Header} ->
+ {subtract_in_memory_counts(
+ Header, add_bytes_checkout(Header, State1)),
+ ConsumerMsg};
+ {'$empty_msg', Header} ->
+ {add_bytes_checkout(Header, State1),
+ ConsumerMsg};
+ {_, {_, {Header, 'empty'}} = M} ->
+ {add_bytes_checkout(Header, State1),
+ M};
+ {_, {_, {Header, _} = M}} ->
+ {subtract_in_memory_counts(
+ Header,
+ add_bytes_checkout(Header, State1)),
+ M}
+ end,
+ {success, ConsumerId, Next, Msg, State};
+ error ->
+ %% consumer did not exist but was queued, recurse
+ checkout_one(Meta, InitState#?MODULE{service_queue = SQ1})
+ end;
+ empty ->
+ {nochange, InitState}
+ end;
+ {empty, _} ->
+ case lqueue:len(Messages0) of
+ 0 -> {nochange, InitState};
+ _ -> {inactive, InitState}
+ end
+ end.
+
+update_or_remove_sub(_Meta, ConsumerId, #consumer{lifetime = auto,
+ credit = 0} = Con,
+ #?MODULE{consumers = Cons} = State) ->
+ State#?MODULE{consumers = maps:put(ConsumerId, Con, Cons)};
+update_or_remove_sub(_Meta, ConsumerId, #consumer{lifetime = auto} = Con,
+ #?MODULE{consumers = Cons,
+ service_queue = ServiceQueue} = State) ->
+ State#?MODULE{consumers = maps:put(ConsumerId, Con, Cons),
+ service_queue = uniq_queue_in(ConsumerId, Con, ServiceQueue)};
+update_or_remove_sub(#{system_time := Ts},
+ ConsumerId, #consumer{lifetime = once,
+ checked_out = Checked,
+ credit = 0} = Con,
+ #?MODULE{consumers = Cons} = State) ->
+ case maps:size(Checked) of
+ 0 ->
+ % we're done with this consumer
+ State#?MODULE{consumers = maps:remove(ConsumerId, Cons),
+ last_active = Ts};
+ _ ->
+ % there are unsettled items so need to keep around
+ State#?MODULE{consumers = maps:put(ConsumerId, Con, Cons)}
+ end;
+update_or_remove_sub(_Meta, ConsumerId, #consumer{lifetime = once} = Con,
+ #?MODULE{consumers = Cons,
+ service_queue = ServiceQueue} = State) ->
+ State#?MODULE{consumers = maps:put(ConsumerId, Con, Cons),
+ service_queue = uniq_queue_in(ConsumerId, Con, ServiceQueue)}.
+
+uniq_queue_in(Key, #consumer{priority = P}, Queue) ->
+ % TODO: queue:member could surely be quite expensive, however the practical
+ % number of unique consumers may not be large enough for it to matter
+ case priority_queue:member(Key, Queue) of
+ true ->
+ Queue;
+ false ->
+ priority_queue:in(Key, P, Queue)
+ end.
+
+update_consumer(ConsumerId, Meta, Spec, Priority,
+ #?MODULE{cfg = #cfg{consumer_strategy = competing}} = State0) ->
+ %% general case, single active consumer off
+ update_consumer0(ConsumerId, Meta, Spec, Priority, State0);
+update_consumer(ConsumerId, Meta, Spec, Priority,
+ #?MODULE{consumers = Cons0,
+ cfg = #cfg{consumer_strategy = single_active}} = State0)
+ when map_size(Cons0) == 0 ->
+ %% single active consumer on, no one is consuming yet
+ update_consumer0(ConsumerId, Meta, Spec, Priority, State0);
+update_consumer(ConsumerId, Meta, {Life, Credit, Mode}, Priority,
+ #?MODULE{cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = WaitingConsumers0} = State0) ->
+ %% single active consumer on and one active consumer already
+ %% adding the new consumer to the waiting list
+ Consumer = #consumer{lifetime = Life, meta = Meta,
+ priority = Priority,
+ credit = Credit, credit_mode = Mode},
+ WaitingConsumers1 = WaitingConsumers0 ++ [{ConsumerId, Consumer}],
+ State0#?MODULE{waiting_consumers = WaitingConsumers1}.
+
+update_consumer0(ConsumerId, Meta, {Life, Credit, Mode}, Priority,
+ #?MODULE{consumers = Cons0,
+ service_queue = ServiceQueue0} = State0) ->
+ %% TODO: this logic may not be correct for updating a pre-existing consumer
+ Init = #consumer{lifetime = Life, meta = Meta,
+ priority = Priority,
+ credit = Credit, credit_mode = Mode},
+ Cons = maps:update_with(ConsumerId,
+ fun(S) ->
+ %% remove any in-flight messages from
+ %% the credit update
+ N = maps:size(S#consumer.checked_out),
+ C = max(0, Credit - N),
+ S#consumer{lifetime = Life, credit = C}
+ end, Init, Cons0),
+ ServiceQueue = maybe_queue_consumer(ConsumerId, maps:get(ConsumerId, Cons),
+ ServiceQueue0),
+ State0#?MODULE{consumers = Cons, service_queue = ServiceQueue}.
+
+maybe_queue_consumer(ConsumerId, #consumer{credit = Credit} = Con,
+ ServiceQueue0) ->
+ case Credit > 0 of
+ true ->
+ % consumerect needs service - check if already on service queue
+ uniq_queue_in(ConsumerId, Con, ServiceQueue0);
+ false ->
+ ServiceQueue0
+ end.
+
+%% creates a dehydrated version of the current state to be cached and
+%% potentially used to for a snaphot at a later point
+dehydrate_state(#?MODULE{messages = Messages,
+ consumers = Consumers,
+ returns = Returns,
+ prefix_msgs = {PRCnt, PrefRet0, PPCnt, PrefMsg0},
+ waiting_consumers = Waiting0} = State) ->
+ RCnt = lqueue:len(Returns),
+ %% TODO: optimise this function as far as possible
+ PrefRet1 = lists:foldr(fun ({'$prefix_msg', Header}, Acc) ->
+ [Header | Acc];
+ ({'$empty_msg', _} = Msg, Acc) ->
+ [Msg | Acc];
+ ({_, {_, {Header, 'empty'}}}, Acc) ->
+ [{'$empty_msg', Header} | Acc];
+ ({_, {_, {Header, _}}}, Acc) ->
+ [Header | Acc]
+ end,
+ [],
+ lqueue:to_list(Returns)),
+ PrefRet = PrefRet0 ++ PrefRet1,
+ PrefMsgsSuff = dehydrate_messages(Messages, []),
+ %% prefix messages are not populated in normal operation only after
+ %% recovering from a snapshot
+ PrefMsgs = PrefMsg0 ++ PrefMsgsSuff,
+ Waiting = [{Cid, dehydrate_consumer(C)} || {Cid, C} <- Waiting0],
+ State#?MODULE{messages = lqueue:new(),
+ ra_indexes = rabbit_fifo_index:empty(),
+ release_cursors = lqueue:new(),
+ consumers = maps:map(fun (_, C) ->
+ dehydrate_consumer(C)
+ end, Consumers),
+ returns = lqueue:new(),
+ prefix_msgs = {PRCnt + RCnt, PrefRet,
+ PPCnt + lqueue:len(Messages), PrefMsgs},
+ waiting_consumers = Waiting}.
+
+%% TODO make body recursive to avoid allocating lists:reverse call
+dehydrate_messages(Msgs0, Acc0) ->
+ {OutRes, Msgs} = lqueue:out(Msgs0),
+ case OutRes of
+ {value, {_MsgId, {_RaftId, {_, 'empty'} = Msg}}} ->
+ dehydrate_messages(Msgs, [Msg | Acc0]);
+ {value, {_MsgId, {_RaftId, {Header, _}}}} ->
+ dehydrate_messages(Msgs, [Header | Acc0]);
+ empty ->
+ lists:reverse(Acc0)
+ end.
+
+dehydrate_consumer(#consumer{checked_out = Checked0} = Con) ->
+ Checked = maps:map(fun (_, {'$prefix_msg', _} = M) ->
+ M;
+ (_, {'$empty_msg', _} = M) ->
+ M;
+ (_, {_, {_, {Header, 'empty'}}}) ->
+ {'$empty_msg', Header};
+ (_, {_, {_, {Header, _}}}) ->
+ {'$prefix_msg', Header}
+ end, Checked0),
+ Con#consumer{checked_out = Checked}.
+
+%% make the state suitable for equality comparison
+normalize(#?MODULE{messages = Messages,
+ release_cursors = Cursors} = State) ->
+ State#?MODULE{messages = lqueue:from_list(lqueue:to_list(Messages)),
+ release_cursors = lqueue:from_list(lqueue:to_list(Cursors))}.
+
+is_over_limit(#?MODULE{cfg = #cfg{max_length = undefined,
+ max_bytes = undefined}}) ->
+ false;
+is_over_limit(#?MODULE{cfg = #cfg{max_length = MaxLength,
+ max_bytes = MaxBytes},
+ msg_bytes_enqueue = BytesEnq} = State) ->
+ messages_ready(State) > MaxLength orelse (BytesEnq > MaxBytes).
+
+is_below_soft_limit(#?MODULE{cfg = #cfg{max_length = undefined,
+ max_bytes = undefined}}) ->
+ false;
+is_below_soft_limit(#?MODULE{cfg = #cfg{max_length = MaxLength,
+ max_bytes = MaxBytes},
+ msg_bytes_enqueue = BytesEnq} = State) ->
+ is_below(MaxLength, messages_ready(State)) andalso
+ is_below(MaxBytes, BytesEnq).
+
+is_below(undefined, _Num) ->
+ true;
+is_below(Val, Num) when is_integer(Val) andalso is_integer(Num) ->
+ Num =< trunc(Val * ?LOW_LIMIT).
+
+-spec make_enqueue(option(pid()), option(msg_seqno()), raw_msg()) -> protocol().
+make_enqueue(Pid, Seq, Msg) ->
+ #enqueue{pid = Pid, seq = Seq, msg = Msg}.
+
+-spec make_register_enqueuer(pid()) -> protocol().
+make_register_enqueuer(Pid) ->
+ #register_enqueuer{pid = Pid}.
+
+-spec make_checkout(consumer_id(),
+ checkout_spec(), consumer_meta()) -> protocol().
+make_checkout(ConsumerId, Spec, Meta) ->
+ #checkout{consumer_id = ConsumerId,
+ spec = Spec, meta = Meta}.
+
+-spec make_settle(consumer_id(), [msg_id()]) -> protocol().
+make_settle(ConsumerId, MsgIds) when is_list(MsgIds) ->
+ #settle{consumer_id = ConsumerId, msg_ids = MsgIds}.
+
+-spec make_return(consumer_id(), [msg_id()]) -> protocol().
+make_return(ConsumerId, MsgIds) ->
+ #return{consumer_id = ConsumerId, msg_ids = MsgIds}.
+
+-spec make_discard(consumer_id(), [msg_id()]) -> protocol().
+make_discard(ConsumerId, MsgIds) ->
+ #discard{consumer_id = ConsumerId, msg_ids = MsgIds}.
+
+-spec make_credit(consumer_id(), non_neg_integer(), non_neg_integer(),
+ boolean()) -> protocol().
+make_credit(ConsumerId, Credit, DeliveryCount, Drain) ->
+ #credit{consumer_id = ConsumerId,
+ credit = Credit,
+ delivery_count = DeliveryCount,
+ drain = Drain}.
+
+-spec make_purge() -> protocol().
+make_purge() -> #purge{}.
+
+-spec make_garbage_collection() -> protocol().
+make_garbage_collection() -> #garbage_collection{}.
+
+-spec make_purge_nodes([node()]) -> protocol().
+make_purge_nodes(Nodes) ->
+ #purge_nodes{nodes = Nodes}.
+
+-spec make_update_config(config()) -> protocol().
+make_update_config(Config) ->
+ #update_config{config = Config}.
+
+add_bytes_enqueue(Bytes,
+ #?MODULE{msg_bytes_enqueue = Enqueue} = State)
+ when is_integer(Bytes) ->
+ State#?MODULE{msg_bytes_enqueue = Enqueue + Bytes};
+add_bytes_enqueue(#{size := Bytes}, State) ->
+ add_bytes_enqueue(Bytes, State).
+
+add_bytes_drop(Bytes,
+ #?MODULE{msg_bytes_enqueue = Enqueue} = State)
+ when is_integer(Bytes) ->
+ State#?MODULE{msg_bytes_enqueue = Enqueue - Bytes};
+add_bytes_drop(#{size := Bytes}, State) ->
+ add_bytes_drop(Bytes, State).
+
+add_bytes_checkout(Bytes,
+ #?MODULE{msg_bytes_checkout = Checkout,
+ msg_bytes_enqueue = Enqueue } = State)
+ when is_integer(Bytes) ->
+ State#?MODULE{msg_bytes_checkout = Checkout + Bytes,
+ msg_bytes_enqueue = Enqueue - Bytes};
+add_bytes_checkout(#{size := Bytes}, State) ->
+ add_bytes_checkout(Bytes, State).
+
+add_bytes_settle(Bytes,
+ #?MODULE{msg_bytes_checkout = Checkout} = State)
+ when is_integer(Bytes) ->
+ State#?MODULE{msg_bytes_checkout = Checkout - Bytes};
+add_bytes_settle(#{size := Bytes}, State) ->
+ add_bytes_settle(Bytes, State).
+
+add_bytes_return(Bytes,
+ #?MODULE{msg_bytes_checkout = Checkout,
+ msg_bytes_enqueue = Enqueue} = State)
+ when is_integer(Bytes) ->
+ State#?MODULE{msg_bytes_checkout = Checkout - Bytes,
+ msg_bytes_enqueue = Enqueue + Bytes};
+add_bytes_return(#{size := Bytes}, State) ->
+ add_bytes_return(Bytes, State).
+
+add_in_memory_counts(Bytes,
+ #?MODULE{msg_bytes_in_memory = InMemoryBytes,
+ msgs_ready_in_memory = InMemoryCount} = State)
+ when is_integer(Bytes) ->
+ State#?MODULE{msg_bytes_in_memory = InMemoryBytes + Bytes,
+ msgs_ready_in_memory = InMemoryCount + 1};
+add_in_memory_counts(#{size := Bytes}, State) ->
+ add_in_memory_counts(Bytes, State).
+
+subtract_in_memory_counts(Bytes,
+ #?MODULE{msg_bytes_in_memory = InMemoryBytes,
+ msgs_ready_in_memory = InMemoryCount} = State)
+ when is_integer(Bytes) ->
+ State#?MODULE{msg_bytes_in_memory = InMemoryBytes - Bytes,
+ msgs_ready_in_memory = InMemoryCount - 1};
+subtract_in_memory_counts(#{size := Bytes}, State) ->
+ subtract_in_memory_counts(Bytes, State).
+
+message_size(#basic_message{content = Content}) ->
+ #content{payload_fragments_rev = PFR} = Content,
+ iolist_size(PFR);
+message_size({'$prefix_msg', H}) ->
+ get_size_from_header(H);
+message_size({'$empty_msg', H}) ->
+ get_size_from_header(H);
+message_size(B) when is_binary(B) ->
+ byte_size(B);
+message_size(Msg) ->
+ %% probably only hit this for testing so ok to use erts_debug
+ erts_debug:size(Msg).
+
+get_size_from_header(Size) when is_integer(Size) ->
+ Size;
+get_size_from_header(#{size := B}) ->
+ B.
+
+
+all_nodes(#?MODULE{consumers = Cons0,
+ enqueuers = Enqs0,
+ waiting_consumers = WaitingConsumers0}) ->
+ Nodes0 = maps:fold(fun({_, P}, _, Acc) ->
+ Acc#{node(P) => ok}
+ end, #{}, Cons0),
+ Nodes1 = maps:fold(fun(P, _, Acc) ->
+ Acc#{node(P) => ok}
+ end, Nodes0, Enqs0),
+ maps:keys(
+ lists:foldl(fun({{_, P}, _}, Acc) ->
+ Acc#{node(P) => ok}
+ end, Nodes1, WaitingConsumers0)).
+
+all_pids_for(Node, #?MODULE{consumers = Cons0,
+ enqueuers = Enqs0,
+ waiting_consumers = WaitingConsumers0}) ->
+ Cons = maps:fold(fun({_, P}, _, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, _, Acc) -> Acc
+ end, [], Cons0),
+ Enqs = maps:fold(fun(P, _, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, _, Acc) -> Acc
+ end, Cons, Enqs0),
+ lists:foldl(fun({{_, P}, _}, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, Acc) -> Acc
+ end, Enqs, WaitingConsumers0).
+
+suspected_pids_for(Node, #?MODULE{consumers = Cons0,
+ enqueuers = Enqs0,
+ waiting_consumers = WaitingConsumers0}) ->
+ Cons = maps:fold(fun({_, P}, #consumer{status = suspected_down}, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, _, Acc) -> Acc
+ end, [], Cons0),
+ Enqs = maps:fold(fun(P, #enqueuer{status = suspected_down}, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, _, Acc) -> Acc
+ end, Cons, Enqs0),
+ lists:foldl(fun({{_, P},
+ #consumer{status = suspected_down}}, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, Acc) -> Acc
+ end, Enqs, WaitingConsumers0).
+
+is_expired(Ts, #?MODULE{cfg = #cfg{expires = Expires},
+ last_active = LastActive,
+ consumers = Consumers})
+ when is_number(LastActive) andalso is_number(Expires) ->
+ %% TODO: should it be active consumers?
+ Active = maps:filter(fun (_, #consumer{status = suspected_down}) ->
+ false;
+ (_, _) ->
+ true
+ end, Consumers),
+
+ Ts > (LastActive + Expires) andalso maps:size(Active) == 0;
+is_expired(_Ts, _State) ->
+ false.
+
+get_priority_from_args(#{args := Args}) ->
+ case rabbit_misc:table_lookup(Args, <<"x-priority">>) of
+ {_Key, Value} ->
+ Value;
+ _ -> 0
+ end;
+get_priority_from_args(_) ->
+ 0.
diff --git a/deps/rabbit/src/rabbit_fifo.hrl b/deps/rabbit/src/rabbit_fifo.hrl
new file mode 100644
index 0000000000..a63483becd
--- /dev/null
+++ b/deps/rabbit/src/rabbit_fifo.hrl
@@ -0,0 +1,210 @@
+
+-type option(T) :: undefined | T.
+
+-type raw_msg() :: term().
+%% The raw message. It is opaque to rabbit_fifo.
+
+-type msg_in_id() :: non_neg_integer().
+% a queue scoped monotonically incrementing integer used to enforce order
+% in the unassigned messages map
+
+-type msg_id() :: non_neg_integer().
+%% A consumer-scoped monotonically incrementing integer included with a
+%% {@link delivery/0.}. Used to settle deliveries using
+%% {@link rabbit_fifo_client:settle/3.}
+
+-type msg_seqno() :: non_neg_integer().
+%% A sender process scoped monotonically incrementing integer included
+%% in enqueue messages. Used to ensure ordering of messages send from the
+%% same process
+
+-type msg_header() :: msg_size() |
+ #{size := msg_size(),
+ delivery_count => non_neg_integer()}.
+%% The message header:
+%% delivery_count: the number of unsuccessful delivery attempts.
+%% A non-zero value indicates a previous attempt.
+%% If it only contains the size it can be condensed to an integer only
+
+-type msg() :: {msg_header(), raw_msg()}.
+%% message with a header map.
+
+-type msg_size() :: non_neg_integer().
+%% the size in bytes of the msg payload
+
+-type indexed_msg() :: {ra:index(), msg()}.
+
+-type prefix_msg() :: {'$prefix_msg', msg_header()}.
+
+-type delivery_msg() :: {msg_id(), msg()}.
+%% A tuple consisting of the message id and the headered message.
+
+-type consumer_tag() :: binary().
+%% An arbitrary binary tag used to distinguish between different consumers
+%% set up by the same process. See: {@link rabbit_fifo_client:checkout/3.}
+
+-type delivery() :: {delivery, consumer_tag(), [delivery_msg()]}.
+%% Represents the delivery of one or more rabbit_fifo messages.
+
+-type consumer_id() :: {consumer_tag(), pid()}.
+%% The entity that receives messages. Uniquely identifies a consumer.
+
+-type credit_mode() :: simple_prefetch | credited.
+%% determines how credit is replenished
+
+-type checkout_spec() :: {once | auto, Num :: non_neg_integer(),
+ credit_mode()} |
+ {dequeue, settled | unsettled} |
+ cancel.
+
+-type consumer_meta() :: #{ack => boolean(),
+ username => binary(),
+ prefetch => non_neg_integer(),
+ args => list()}.
+%% static meta data associated with a consumer
+
+
+-type applied_mfa() :: {module(), atom(), list()}.
+% represents a partially applied module call
+
+-define(RELEASE_CURSOR_EVERY, 2048).
+-define(RELEASE_CURSOR_EVERY_MAX, 3200000).
+-define(USE_AVG_HALF_LIFE, 10000.0).
+%% an average QQ without any message uses about 100KB so setting this limit
+%% to ~10 times that should be relatively safe.
+-define(GC_MEM_LIMIT_B, 2000000).
+
+-define(MB, 1048576).
+-define(LOW_LIMIT, 0.8).
+
+-record(consumer,
+ {meta = #{} :: consumer_meta(),
+ checked_out = #{} :: #{msg_id() => {msg_in_id(), indexed_msg()}},
+ next_msg_id = 0 :: msg_id(), % part of snapshot data
+ %% max number of messages that can be sent
+ %% decremented for each delivery
+ credit = 0 : non_neg_integer(),
+ %% total number of checked out messages - ever
+ %% incremented for each delivery
+ delivery_count = 0 :: non_neg_integer(),
+ %% the mode of how credit is incremented
+ %% simple_prefetch: credit is re-filled as deliveries are settled
+ %% or returned.
+ %% credited: credit can only be changed by receiving a consumer_credit
+ %% command: `{consumer_credit, ReceiverDeliveryCount, Credit}'
+ credit_mode = simple_prefetch :: credit_mode(), % part of snapshot data
+ lifetime = once :: once | auto,
+ status = up :: up | suspected_down | cancelled,
+ priority = 0 :: non_neg_integer()
+ }).
+
+-type consumer() :: #consumer{}.
+
+-type consumer_strategy() :: competing | single_active.
+
+-type milliseconds() :: non_neg_integer().
+
+-record(enqueuer,
+ {next_seqno = 1 :: msg_seqno(),
+ % out of order enqueues - sorted list
+ pending = [] :: [{msg_seqno(), ra:index(), raw_msg()}],
+ status = up :: up |
+ suspected_down,
+ %% it is useful to have a record of when this was blocked
+ %% so that we can retry sending the block effect if
+ %% the publisher did not receive the initial one
+ blocked :: undefined | ra:index(),
+ unused_1,
+ unused_2
+ }).
+
+-record(cfg,
+ {name :: atom(),
+ resource :: rabbit_types:r('queue'),
+ release_cursor_interval :: option({non_neg_integer(), non_neg_integer()}),
+ dead_letter_handler :: option(applied_mfa()),
+ become_leader_handler :: option(applied_mfa()),
+ overflow_strategy = drop_head :: drop_head | reject_publish,
+ max_length :: option(non_neg_integer()),
+ max_bytes :: option(non_neg_integer()),
+ %% whether single active consumer is on or not for this queue
+ consumer_strategy = competing :: consumer_strategy(),
+ %% the maximum number of unsuccessful delivery attempts permitted
+ delivery_limit :: option(non_neg_integer()),
+ max_in_memory_length :: option(non_neg_integer()),
+ max_in_memory_bytes :: option(non_neg_integer()),
+ expires :: undefined | milliseconds(),
+ unused_1,
+ unused_2
+ }).
+
+-type prefix_msgs() :: {list(), list()} |
+ {non_neg_integer(), list(),
+ non_neg_integer(), list()}.
+
+-record(rabbit_fifo,
+ {cfg :: #cfg{},
+ % unassigned messages
+ messages = lqueue:new() :: lqueue:lqueue({msg_in_id(), indexed_msg()}),
+ % defines the next message id
+ next_msg_num = 1 :: msg_in_id(),
+ % queue of returned msg_in_ids - when checking out it picks from
+ returns = lqueue:new() :: lqueue:lqueue(prefix_msg() |
+ {msg_in_id(), indexed_msg()}),
+ % a counter of enqueues - used to trigger shadow copy points
+ enqueue_count = 0 :: non_neg_integer(),
+ % a map containing all the live processes that have ever enqueued
+ % a message to this queue as well as a cached value of the smallest
+ % ra_index of all pending enqueues
+ enqueuers = #{} :: #{pid() => #enqueuer{}},
+ % master index of all enqueue raft indexes including pending
+ % enqueues
+ % rabbit_fifo_index can be slow when calculating the smallest
+ % index when there are large gaps but should be faster than gb_trees
+ % for normal appending operations as it's backed by a map
+ ra_indexes = rabbit_fifo_index:empty() :: rabbit_fifo_index:state(),
+ release_cursors = lqueue:new() :: lqueue:lqueue({release_cursor,
+ ra:index(), #rabbit_fifo{}}),
+ % consumers need to reflect consumer state at time of snapshot
+ % needs to be part of snapshot
+ consumers = #{} :: #{consumer_id() => #consumer{}},
+ % consumers that require further service are queued here
+ % needs to be part of snapshot
+ service_queue = priority_queue:new() :: priority_queue:q(),
+ %% This is a special field that is only used for snapshots
+ %% It represents the queued messages at the time the
+ %% dehydrated snapshot state was cached.
+ %% As release_cursors are only emitted for raft indexes where all
+ %% prior messages no longer contribute to the current state we can
+ %% replace all message payloads with their sizes (to be used for
+ %% overflow calculations).
+ %% This is done so that consumers are still served in a deterministic
+ %% order on recovery.
+ prefix_msgs = {0, [], 0, []} :: prefix_msgs(),
+ msg_bytes_enqueue = 0 :: non_neg_integer(),
+ msg_bytes_checkout = 0 :: non_neg_integer(),
+ %% waiting consumers, one is picked active consumer is cancelled or dies
+ %% used only when single active consumer is on
+ waiting_consumers = [] :: [{consumer_id(), consumer()}],
+ msg_bytes_in_memory = 0 :: non_neg_integer(),
+ msgs_ready_in_memory = 0 :: non_neg_integer(),
+ last_active :: undefined | non_neg_integer(),
+ unused_1,
+ unused_2
+ }).
+
+-type config() :: #{name := atom(),
+ queue_resource := rabbit_types:r('queue'),
+ dead_letter_handler => applied_mfa(),
+ become_leader_handler => applied_mfa(),
+ release_cursor_interval => non_neg_integer(),
+ max_length => non_neg_integer(),
+ max_bytes => non_neg_integer(),
+ max_in_memory_length => non_neg_integer(),
+ max_in_memory_bytes => non_neg_integer(),
+ overflow_strategy => drop_head | reject_publish,
+ single_active_consumer_on => boolean(),
+ delivery_limit => non_neg_integer(),
+ expires => non_neg_integer(),
+ created => non_neg_integer()
+ }.
diff --git a/deps/rabbit/src/rabbit_fifo_client.erl b/deps/rabbit/src/rabbit_fifo_client.erl
new file mode 100644
index 0000000000..3990222b15
--- /dev/null
+++ b/deps/rabbit/src/rabbit_fifo_client.erl
@@ -0,0 +1,888 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% @doc Provides an easy to consume API for interacting with the {@link rabbit_fifo.}
+%% state machine implementation running inside a `ra' raft system.
+%%
+%% Handles command tracking and other non-functional concerns.
+-module(rabbit_fifo_client).
+
+-export([
+ init/2,
+ init/3,
+ init/5,
+ checkout/5,
+ cancel_checkout/2,
+ enqueue/2,
+ enqueue/3,
+ dequeue/3,
+ settle/3,
+ return/3,
+ discard/3,
+ credit/4,
+ handle_ra_event/3,
+ untracked_enqueue/2,
+ purge/1,
+ cluster_name/1,
+ update_machine_state/2,
+ pending_size/1,
+ stat/1,
+ stat/2
+ ]).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-define(SOFT_LIMIT, 32).
+-define(TIMER_TIME, 10000).
+
+-type seq() :: non_neg_integer().
+%% last_applied is initialised to -1
+-type maybe_seq() :: integer().
+-type action() :: {send_credit_reply, Available :: non_neg_integer()} |
+ {send_drained, CTagCredit ::
+ {rabbit_fifo:consumer_tag(), non_neg_integer()}}.
+-type actions() :: [action()].
+
+-type cluster_name() :: rabbit_types:r(queue).
+
+-record(consumer, {last_msg_id :: seq() | -1,
+ ack = false :: boolean(),
+ delivery_count = 0 :: non_neg_integer()}).
+
+-record(cfg, {cluster_name :: cluster_name(),
+ servers = [] :: [ra:server_id()],
+ soft_limit = ?SOFT_LIMIT :: non_neg_integer(),
+ block_handler = fun() -> ok end :: fun(() -> term()),
+ unblock_handler = fun() -> ok end :: fun(() -> ok),
+ timeout :: non_neg_integer(),
+ version = 0 :: non_neg_integer()}).
+
+-record(state, {cfg :: #cfg{},
+ leader :: undefined | ra:server_id(),
+ queue_status :: undefined | go | reject_publish,
+ next_seq = 0 :: seq(),
+ %% Last applied is initialise to -1 to note that no command has yet been
+ %% applied, but allowing to resend messages if the first ones on the sequence
+ %% are lost (messages are sent from last_applied + 1)
+ last_applied = -1 :: maybe_seq(),
+ next_enqueue_seq = 1 :: seq(),
+ %% indicates that we've exceeded the soft limit
+ slow = false :: boolean(),
+ unsent_commands = #{} :: #{rabbit_fifo:consumer_id() =>
+ {[seq()], [seq()], [seq()]}},
+ pending = #{} :: #{seq() =>
+ {term(), rabbit_fifo:command()}},
+ consumer_deliveries = #{} :: #{rabbit_fifo:consumer_tag() =>
+ #consumer{}},
+ timer_state :: term()
+ }).
+
+-opaque state() :: #state{}.
+
+-export_type([
+ state/0,
+ actions/0
+ ]).
+
+
+%% @doc Create the initial state for a new rabbit_fifo sessions. A state is needed
+%% to interact with a rabbit_fifo queue using @module.
+%% @param ClusterName the id of the cluster to interact with
+%% @param Servers The known servers of the queue. If the current leader is known
+%% ensure the leader node is at the head of the list.
+-spec init(cluster_name(), [ra:server_id()]) -> state().
+init(ClusterName, Servers) ->
+ init(ClusterName, Servers, ?SOFT_LIMIT).
+
+%% @doc Create the initial state for a new rabbit_fifo sessions. A state is needed
+%% to interact with a rabbit_fifo queue using @module.
+%% @param ClusterName the id of the cluster to interact with
+%% @param Servers The known servers of the queue. If the current leader is known
+%% ensure the leader node is at the head of the list.
+%% @param MaxPending size defining the max number of pending commands.
+-spec init(cluster_name(), [ra:server_id()], non_neg_integer()) -> state().
+init(ClusterName = #resource{}, Servers, SoftLimit) ->
+ Timeout = application:get_env(kernel, net_ticktime, 60) + 5,
+ #state{cfg = #cfg{cluster_name = ClusterName,
+ servers = Servers,
+ soft_limit = SoftLimit,
+ timeout = Timeout * 1000}}.
+
+-spec init(cluster_name(), [ra:server_id()], non_neg_integer(), fun(() -> ok),
+ fun(() -> ok)) -> state().
+init(ClusterName = #resource{}, Servers, SoftLimit, BlockFun, UnblockFun) ->
+ %% net ticktime is in seconds
+ Timeout = application:get_env(kernel, net_ticktime, 60) + 5,
+ #state{cfg = #cfg{cluster_name = ClusterName,
+ servers = Servers,
+ block_handler = BlockFun,
+ unblock_handler = UnblockFun,
+ soft_limit = SoftLimit,
+ timeout = Timeout * 1000}}.
+
+
+%% @doc Enqueues a message.
+%% @param Correlation an arbitrary erlang term used to correlate this
+%% command when it has been applied.
+%% @param Msg an arbitrary erlang term representing the message.
+%% @param State the current {@module} state.
+%% @returns
+%% `{ok | slow, State}' if the command was successfully sent. If the return
+%% tag is `slow' it means the limit is approaching and it is time to slow down
+%% the sending rate.
+%% {@module} assigns a sequence number to every raft command it issues. The
+%% SequenceNumber can be correlated to the applied sequence numbers returned
+%% by the {@link handle_ra_event/2. handle_ra_event/2} function.
+-spec enqueue(Correlation :: term(), Msg :: term(), State :: state()) ->
+ {ok | slow | reject_publish, state()}.
+enqueue(Correlation, Msg,
+ #state{queue_status = undefined,
+ next_enqueue_seq = 1,
+ cfg = #cfg{timeout = Timeout}} = State0) ->
+ %% it is the first enqueue, check the version
+ {_, Node} = Server = pick_server(State0),
+ case rpc:call(Node, ra_machine, version, [{machine, rabbit_fifo, #{}}]) of
+ 0 ->
+ %% the leader is running the old version
+ %% so we can't initialize the enqueuer session safely
+ %% fall back on old behavour
+ enqueue(Correlation, Msg, State0#state{queue_status = go});
+ 1 ->
+ %% were running the new version on the leader do sync initialisation
+ %% of enqueuer session
+ Reg = rabbit_fifo:make_register_enqueuer(self()),
+ case ra:process_command(Server, Reg, Timeout) of
+ {ok, reject_publish, _} ->
+ {reject_publish, State0#state{queue_status = reject_publish}};
+ {ok, ok, _} ->
+ enqueue(Correlation, Msg, State0#state{queue_status = go});
+ {timeout, _} ->
+ %% if we timeout it is probably better to reject
+ %% the message than being uncertain
+ {reject_publish, State0};
+ Err ->
+ exit(Err)
+ end;
+ {badrpc, nodedown} ->
+ {reject_publish, State0}
+ end;
+enqueue(_Correlation, _Msg,
+ #state{queue_status = reject_publish,
+ cfg = #cfg{}} = State) ->
+ {reject_publish, State};
+enqueue(Correlation, Msg,
+ #state{slow = Slow,
+ queue_status = go,
+ cfg = #cfg{block_handler = BlockFun}} = State0) ->
+ Node = pick_server(State0),
+ {Next, State1} = next_enqueue_seq(State0),
+ % by default there is no correlation id
+ Cmd = rabbit_fifo:make_enqueue(self(), Next, Msg),
+ case send_command(Node, Correlation, Cmd, low, State1) of
+ {slow, State} when not Slow ->
+ BlockFun(),
+ {slow, set_timer(State)};
+ Any ->
+ Any
+ end.
+
+%% @doc Enqueues a message.
+%% @param Msg an arbitrary erlang term representing the message.
+%% @param State the current {@module} state.
+%% @returns
+%% `{ok | slow, State}' if the command was successfully sent. If the return
+%% tag is `slow' it means the limit is approaching and it is time to slow down
+%% the sending rate.
+%% {@module} assigns a sequence number to every raft command it issues. The
+%% SequenceNumber can be correlated to the applied sequence numbers returned
+%% by the {@link handle_ra_event/2. handle_ra_event/2} function.
+%%
+-spec enqueue(Msg :: term(), State :: state()) ->
+ {ok | slow | reject_publish, state()}.
+enqueue(Msg, State) ->
+ enqueue(undefined, Msg, State).
+
+%% @doc Dequeue a message from the queue.
+%%
+%% This is a synchronous call. I.e. the call will block until the command
+%% has been accepted by the ra process or it times out.
+%%
+%% @param ConsumerTag a unique tag to identify this particular consumer.
+%% @param Settlement either `settled' or `unsettled'. When `settled' no
+%% further settlement needs to be done.
+%% @param State The {@module} state.
+%%
+%% @returns `{ok, IdMsg, State}' or `{error | timeout, term()}'
+-spec dequeue(rabbit_fifo:consumer_tag(),
+ Settlement :: settled | unsettled, state()) ->
+ {ok, non_neg_integer(), term(), non_neg_integer()}
+ | {empty, state()} | {error | timeout, term()}.
+dequeue(ConsumerTag, Settlement,
+ #state{cfg = #cfg{timeout = Timeout,
+ cluster_name = QName}} = State0) ->
+ Node = pick_server(State0),
+ ConsumerId = consumer_id(ConsumerTag),
+ case ra:process_command(Node,
+ rabbit_fifo:make_checkout(ConsumerId,
+ {dequeue, Settlement},
+ #{}),
+ Timeout) of
+ {ok, {dequeue, empty}, Leader} ->
+ {empty, State0#state{leader = Leader}};
+ {ok, {dequeue, {MsgId, {MsgHeader, Msg0}}, MsgsReady}, Leader} ->
+ Count = case MsgHeader of
+ #{delivery_count := C} -> C;
+ _ -> 0
+ end,
+ IsDelivered = Count > 0,
+ Msg = add_delivery_count_header(Msg0, Count),
+ {ok, MsgsReady,
+ {QName, qref(Leader), MsgId, IsDelivered, Msg},
+ State0#state{leader = Leader}};
+ {ok, {error, _} = Err, _Leader} ->
+ Err;
+ Err ->
+ Err
+ end.
+
+add_delivery_count_header(#basic_message{} = Msg0, Count)
+ when is_integer(Count) ->
+ rabbit_basic:add_header(<<"x-delivery-count">>, long, Count, Msg0);
+add_delivery_count_header(Msg, _Count) ->
+ Msg.
+
+
+%% @doc Settle a message. Permanently removes message from the queue.
+%% @param ConsumerTag the tag uniquely identifying the consumer.
+%% @param MsgIds the message ids received with the {@link rabbit_fifo:delivery/0.}
+%% @param State the {@module} state
+%% @returns
+%% `{ok | slow, State}' if the command was successfully sent. If the return
+%% tag is `slow' it means the limit is approaching and it is time to slow down
+%% the sending rate.
+%%
+-spec settle(rabbit_fifo:consumer_tag(), [rabbit_fifo:msg_id()], state()) ->
+ {state(), list()}.
+settle(ConsumerTag, [_|_] = MsgIds, #state{slow = false} = State0) ->
+ Node = pick_server(State0),
+ Cmd = rabbit_fifo:make_settle(consumer_id(ConsumerTag), MsgIds),
+ case send_command(Node, undefined, Cmd, normal, State0) of
+ {_, S} ->
+ % turn slow into ok for this function
+ {S, []}
+ end;
+settle(ConsumerTag, [_|_] = MsgIds,
+ #state{unsent_commands = Unsent0} = State0) ->
+ ConsumerId = consumer_id(ConsumerTag),
+ %% we've reached the soft limit so will stash the command to be
+ %% sent once we have seen enough notifications
+ Unsent = maps:update_with(ConsumerId,
+ fun ({Settles, Returns, Discards}) ->
+ {Settles ++ MsgIds, Returns, Discards}
+ end, {MsgIds, [], []}, Unsent0),
+ {State0#state{unsent_commands = Unsent}, []}.
+
+%% @doc Return a message to the queue.
+%% @param ConsumerTag the tag uniquely identifying the consumer.
+%% @param MsgIds the message ids to return received
+%% from {@link rabbit_fifo:delivery/0.}
+%% @param State the {@module} state
+%% @returns
+%% `{ok | slow, State}' if the command was successfully sent. If the return
+%% tag is `slow' it means the limit is approaching and it is time to slow down
+%% the sending rate.
+%%
+-spec return(rabbit_fifo:consumer_tag(), [rabbit_fifo:msg_id()], state()) ->
+ {state(), list()}.
+return(ConsumerTag, [_|_] = MsgIds, #state{slow = false} = State0) ->
+ Node = pick_server(State0),
+ % TODO: make rabbit_fifo return support lists of message ids
+ Cmd = rabbit_fifo:make_return(consumer_id(ConsumerTag), MsgIds),
+ case send_command(Node, undefined, Cmd, normal, State0) of
+ {_, S} ->
+ {S, []}
+ end;
+return(ConsumerTag, [_|_] = MsgIds,
+ #state{unsent_commands = Unsent0} = State0) ->
+ ConsumerId = consumer_id(ConsumerTag),
+ %% we've reached the soft limit so will stash the command to be
+ %% sent once we have seen enough notifications
+ Unsent = maps:update_with(ConsumerId,
+ fun ({Settles, Returns, Discards}) ->
+ {Settles, Returns ++ MsgIds, Discards}
+ end, {[], MsgIds, []}, Unsent0),
+ {State0#state{unsent_commands = Unsent}, []}.
+
+%% @doc Discards a checked out message.
+%% If the queue has a dead_letter_handler configured this will be called.
+%% @param ConsumerTag the tag uniquely identifying the consumer.
+%% @param MsgIds the message ids to discard
+%% from {@link rabbit_fifo:delivery/0.}
+%% @param State the {@module} state
+%% @returns
+%% `{ok | slow, State}' if the command was successfully sent. If the return
+%% tag is `slow' it means the limit is approaching and it is time to slow down
+%% the sending rate.
+-spec discard(rabbit_fifo:consumer_tag(), [rabbit_fifo:msg_id()], state()) ->
+ {state(), list()}.
+discard(ConsumerTag, [_|_] = MsgIds, #state{slow = false} = State0) ->
+ Node = pick_server(State0),
+ Cmd = rabbit_fifo:make_discard(consumer_id(ConsumerTag), MsgIds),
+ case send_command(Node, undefined, Cmd, normal, State0) of
+ {_, S} ->
+ % turn slow into ok for this function
+ {S, []}
+ end;
+discard(ConsumerTag, [_|_] = MsgIds,
+ #state{unsent_commands = Unsent0} = State0) ->
+ ConsumerId = consumer_id(ConsumerTag),
+ %% we've reached the soft limit so will stash the command to be
+ %% sent once we have seen enough notifications
+ Unsent = maps:update_with(ConsumerId,
+ fun ({Settles, Returns, Discards}) ->
+ {Settles, Returns, Discards ++ MsgIds}
+ end, {[], [], MsgIds}, Unsent0),
+ {State0#state{unsent_commands = Unsent}, []}.
+
+%% @doc Register with the rabbit_fifo queue to "checkout" messages as they
+%% become available.
+%%
+%% This is a synchronous call. I.e. the call will block until the command
+%% has been accepted by the ra process or it times out.
+%%
+%% @param ConsumerTag a unique tag to identify this particular consumer.
+%% @param NumUnsettled the maximum number of in-flight messages. Once this
+%% number of messages has been received but not settled no further messages
+%% will be delivered to the consumer.
+%% @param CreditMode The credit mode to use for the checkout.
+%% simple_prefetch: credit is auto topped up as deliveries are settled
+%% credited: credit is only increased by sending credit to the queue
+%% @param State The {@module} state.
+%%
+%% @returns `{ok, State}' or `{error | timeout, term()}'
+-spec checkout(rabbit_fifo:consumer_tag(),
+ NumUnsettled :: non_neg_integer(),
+ CreditMode :: rabbit_fifo:credit_mode(),
+ Meta :: rabbit_fifo:consumer_meta(),
+ state()) -> {ok, state()} | {error | timeout, term()}.
+checkout(ConsumerTag, NumUnsettled, CreditMode, Meta,
+ #state{consumer_deliveries = CDels0} = State0) ->
+ Servers = sorted_servers(State0),
+ ConsumerId = {ConsumerTag, self()},
+ Cmd = rabbit_fifo:make_checkout(ConsumerId,
+ {auto, NumUnsettled, CreditMode},
+ Meta),
+ %% ???
+ Ack = maps:get(ack, Meta, true),
+
+ SDels = maps:update_with(ConsumerTag,
+ fun (V) ->
+ V#consumer{ack = Ack}
+ end,
+ #consumer{last_msg_id = -1,
+ ack = Ack}, CDels0),
+ try_process_command(Servers, Cmd, State0#state{consumer_deliveries = SDels}).
+
+%% @doc Provide credit to the queue
+%%
+%% This only has an effect if the consumer uses credit mode: credited
+%% @param ConsumerTag a unique tag to identify this particular consumer.
+%% @param Credit the amount of credit to provide to theq queue
+%% @param Drain tells the queue to use up any credit that cannot be immediately
+%% fulfilled. (i.e. there are not enough messages on queue to use up all the
+%% provided credit).
+-spec credit(rabbit_fifo:consumer_tag(),
+ Credit :: non_neg_integer(),
+ Drain :: boolean(),
+ state()) ->
+ {state(), actions()}.
+credit(ConsumerTag, Credit, Drain,
+ #state{consumer_deliveries = CDels} = State0) ->
+ ConsumerId = consumer_id(ConsumerTag),
+ %% the last received msgid provides us with the delivery count if we
+ %% add one as it is 0 indexed
+ C = maps:get(ConsumerTag, CDels, #consumer{last_msg_id = -1}),
+ Node = pick_server(State0),
+ Cmd = rabbit_fifo:make_credit(ConsumerId, Credit,
+ C#consumer.last_msg_id + 1, Drain),
+ case send_command(Node, undefined, Cmd, normal, State0) of
+ {_, S} ->
+ % turn slow into ok for this function
+ {S, []}
+ end.
+
+%% @doc Cancels a checkout with the rabbit_fifo queue for the consumer tag
+%%
+%% This is a synchronous call. I.e. the call will block until the command
+%% has been accepted by the ra process or it times out.
+%%
+%% @param ConsumerTag a unique tag to identify this particular consumer.
+%% @param State The {@module} state.
+%%
+%% @returns `{ok, State}' or `{error | timeout, term()}'
+-spec cancel_checkout(rabbit_fifo:consumer_tag(), state()) ->
+ {ok, state()} | {error | timeout, term()}.
+cancel_checkout(ConsumerTag, #state{consumer_deliveries = CDels} = State0) ->
+ Servers = sorted_servers(State0),
+ ConsumerId = {ConsumerTag, self()},
+ Cmd = rabbit_fifo:make_checkout(ConsumerId, cancel, #{}),
+ State = State0#state{consumer_deliveries = maps:remove(ConsumerTag, CDels)},
+ try_process_command(Servers, Cmd, State).
+
+%% @doc Purges all the messages from a rabbit_fifo queue and returns the number
+%% of messages purged.
+-spec purge(ra:server_id()) -> {ok, non_neg_integer()} | {error | timeout, term()}.
+purge(Node) ->
+ case ra:process_command(Node, rabbit_fifo:make_purge()) of
+ {ok, {purge, Reply}, _} ->
+ {ok, Reply};
+ Err ->
+ Err
+ end.
+
+-spec pending_size(state()) -> non_neg_integer().
+pending_size(#state{pending = Pend}) ->
+ maps:size(Pend).
+
+-spec stat(ra:server_id()) ->
+ {ok, non_neg_integer(), non_neg_integer()}
+ | {error | timeout, term()}.
+stat(Leader) ->
+ %% short timeout as we don't want to spend too long if it is going to
+ %% fail anyway
+ stat(Leader, 250).
+
+-spec stat(ra:server_id(), non_neg_integer()) ->
+ {ok, non_neg_integer(), non_neg_integer()}
+ | {error | timeout, term()}.
+stat(Leader, Timeout) ->
+ %% short timeout as we don't want to spend too long if it is going to
+ %% fail anyway
+ case ra:local_query(Leader, fun rabbit_fifo:query_stat/1, Timeout) of
+ {ok, {_, {R, C}}, _} -> {ok, R, C};
+ {error, _} = Error -> Error;
+ {timeout, _} = Error -> Error
+ end.
+
+%% @doc returns the cluster name
+-spec cluster_name(state()) -> cluster_name().
+cluster_name(#state{cfg = #cfg{cluster_name = ClusterName}}) ->
+ ClusterName.
+
+update_machine_state(Server, Conf) ->
+ case ra:process_command(Server, rabbit_fifo:make_update_config(Conf)) of
+ {ok, ok, _} ->
+ ok;
+ Err ->
+ Err
+ end.
+
+%% @doc Handles incoming `ra_events'. Events carry both internal "bookeeping"
+%% events emitted by the `ra' leader as well as `rabbit_fifo' emitted events such
+%% as message deliveries. All ra events need to be handled by {@module}
+%% to ensure bookeeping, resends and flow control is correctly handled.
+%%
+%% If the `ra_event' contains a `rabbit_fifo' generated message it will be returned
+%% for further processing.
+%%
+%% Example:
+%%
+%% ```
+%% receive
+%% {ra_event, From, Evt} ->
+%% case rabbit_fifo_client:handle_ra_event(From, Evt, State0) of
+%% {internal, _Seq, State} -> State;
+%% {{delivery, _ConsumerTag, Msgs}, State} ->
+%% handle_messages(Msgs),
+%% ...
+%% end
+%% end
+%% '''
+%%
+%% @param From the {@link ra:server_id().} of the sending process.
+%% @param Event the body of the `ra_event'.
+%% @param State the current {@module} state.
+%%
+%% @returns
+%% `{internal, AppliedCorrelations, State}' if the event contained an internally
+%% handled event such as a notification and a correlation was included with
+%% the command (e.g. in a call to `enqueue/3' the correlation terms are returned
+%% here.
+%%
+%% `{RaFifoEvent, State}' if the event contained a client message generated by
+%% the `rabbit_fifo' state machine such as a delivery.
+%%
+%% The type of `rabbit_fifo' client messages that can be received are:
+%%
+%% `{delivery, ConsumerTag, [{MsgId, {MsgHeader, Msg}}]}'
+%%
+%% <li>`ConsumerTag' the binary tag passed to {@link checkout/3.}</li>
+%% <li>`MsgId' is a consumer scoped monotonically incrementing id that can be
+%% used to {@link settle/3.} (roughly: AMQP 0.9.1 ack) message once finished
+%% with them.</li>
+-spec handle_ra_event(ra:server_id(), ra_server_proc:ra_event_body(), state()) ->
+ {internal, Correlators :: [term()], actions(), state()} |
+ {rabbit_fifo:client_msg(), state()} | eol.
+handle_ra_event(From, {applied, Seqs},
+ #state{cfg = #cfg{cluster_name = QRef,
+ soft_limit = SftLmt,
+ unblock_handler = UnblockFun}} = State0) ->
+
+ {Corrs, Actions0, State1} = lists:foldl(fun seq_applied/2,
+ {[], [], State0#state{leader = From}},
+ Seqs),
+ Actions = case Corrs of
+ [] ->
+ lists:reverse(Actions0);
+ _ ->
+ [{settled, QRef, Corrs}
+ | lists:reverse(Actions0)]
+ end,
+ case maps:size(State1#state.pending) < SftLmt of
+ true when State1#state.slow == true ->
+ % we have exited soft limit state
+ % send any unsent commands and cancel the time as
+ % TODO: really the timer should only be cancelled when the channel
+ % exits flow state (which depends on the state of all queues the
+ % channel is interacting with)
+ % but the fact the queue has just applied suggests
+ % it's ok to cancel here anyway
+ State2 = cancel_timer(State1#state{slow = false,
+ unsent_commands = #{}}),
+ % build up a list of commands to issue
+ Commands = maps:fold(
+ fun (Cid, {Settled, Returns, Discards}, Acc) ->
+ add_command(Cid, settle, Settled,
+ add_command(Cid, return, Returns,
+ add_command(Cid, discard,
+ Discards, Acc)))
+ end, [], State1#state.unsent_commands),
+ Node = pick_server(State2),
+ %% send all the settlements and returns
+ State = lists:foldl(fun (C, S0) ->
+ case send_command(Node, undefined,
+ C, normal, S0) of
+ {T, S} when T =/= error ->
+ S
+ end
+ end, State2, Commands),
+ UnblockFun(),
+ {ok, State, Actions};
+ _ ->
+ {ok, State1, Actions}
+ end;
+handle_ra_event(From, {machine, {delivery, _ConsumerTag, _} = Del}, State0) ->
+ handle_delivery(From, Del, State0);
+handle_ra_event(_, {machine, {queue_status, Status}},
+ #state{} = State) ->
+ %% just set the queue status
+ {ok, State#state{queue_status = Status}, []};
+handle_ra_event(Leader, {machine, leader_change},
+ #state{leader = Leader} = State) ->
+ %% leader already known
+ {ok, State, []};
+handle_ra_event(Leader, {machine, leader_change}, State0) ->
+ %% we need to update leader
+ %% and resend any pending commands
+ State = resend_all_pending(State0#state{leader = Leader}),
+ {ok, State, []};
+handle_ra_event(_From, {rejected, {not_leader, undefined, _Seq}}, State0) ->
+ % TODO: how should these be handled? re-sent on timer or try random
+ {ok, State0, []};
+handle_ra_event(_From, {rejected, {not_leader, Leader, Seq}}, State0) ->
+ State1 = State0#state{leader = Leader},
+ State = resend(Seq, State1),
+ {ok, State, []};
+handle_ra_event(_, timeout, #state{cfg = #cfg{servers = Servers}} = State0) ->
+ case find_leader(Servers) of
+ undefined ->
+ %% still no leader, set the timer again
+ {ok, set_timer(State0), []};
+ Leader ->
+ State = resend_all_pending(State0#state{leader = Leader}),
+ {ok, State, []}
+ end;
+handle_ra_event(_Leader, {machine, eol}, _State0) ->
+ eol.
+
+%% @doc Attempts to enqueue a message using cast semantics. This provides no
+%% guarantees or retries if the message fails to achieve consensus or if the
+%% servers sent to happens not to be available. If the message is sent to a
+%% follower it will attempt the deliver it to the leader, if known. Else it will
+%% drop the messages.
+%%
+%% NB: only use this for non-critical enqueues where a full rabbit_fifo_client state
+%% cannot be maintained.
+%%
+%% @param CusterId the cluster id.
+%% @param Servers the known servers in the cluster.
+%% @param Msg the message to enqueue.
+%%
+%% @returns `ok'
+-spec untracked_enqueue([ra:server_id()], term()) ->
+ ok.
+untracked_enqueue([Node | _], Msg) ->
+ Cmd = rabbit_fifo:make_enqueue(undefined, undefined, Msg),
+ ok = ra:pipeline_command(Node, Cmd),
+ ok.
+
+%% Internal
+
+try_process_command([Server | Rem], Cmd, State) ->
+ case ra:process_command(Server, Cmd, 30000) of
+ {ok, _, Leader} ->
+ {ok, State#state{leader = Leader}};
+ Err when length(Rem) =:= 0 ->
+ Err;
+ _ ->
+ try_process_command(Rem, Cmd, State)
+ end.
+
+seq_applied({Seq, MaybeAction},
+ {Corrs, Actions0, #state{last_applied = Last} = State0})
+ when Seq > Last ->
+ State1 = do_resends(Last+1, Seq-1, State0),
+ {Actions, State} = maybe_add_action(MaybeAction, Actions0, State1),
+ case maps:take(Seq, State#state.pending) of
+ {{undefined, _}, Pending} ->
+ {Corrs, Actions, State#state{pending = Pending,
+ last_applied = Seq}};
+ {{Corr, _}, Pending} ->
+ {[Corr | Corrs], Actions, State#state{pending = Pending,
+ last_applied = Seq}};
+ error ->
+ % must have already been resent or removed for some other reason
+ % still need to update last_applied or we may inadvertently resend
+ % stuff later
+ {Corrs, Actions, State#state{last_applied = Seq}}
+ end;
+seq_applied(_Seq, Acc) ->
+ Acc.
+
+maybe_add_action(ok, Acc, State) ->
+ {Acc, State};
+maybe_add_action({multi, Actions}, Acc0, State0) ->
+ lists:foldl(fun (Act, {Acc, State}) ->
+ maybe_add_action(Act, Acc, State)
+ end, {Acc0, State0}, Actions);
+maybe_add_action({send_drained, {Tag, Credit}} = Action, Acc,
+ #state{consumer_deliveries = CDels} = State) ->
+ %% add credit to consumer delivery_count
+ C = maps:get(Tag, CDels),
+ {[Action | Acc],
+ State#state{consumer_deliveries =
+ update_consumer(Tag, C#consumer.last_msg_id,
+ Credit, C, CDels)}};
+maybe_add_action(Action, Acc, State) ->
+ %% anything else is assumed to be an action
+ {[Action | Acc], State}.
+
+do_resends(From, To, State) when From =< To ->
+ % ?INFO("rabbit_fifo_client: doing resends From ~w To ~w~n", [From, To]),
+ lists:foldl(fun resend/2, State, lists:seq(From, To));
+do_resends(_, _, State) ->
+ State.
+
+% resends a command with a new sequence number
+resend(OldSeq, #state{pending = Pending0, leader = Leader} = State) ->
+ case maps:take(OldSeq, Pending0) of
+ {{Corr, Cmd}, Pending} ->
+ %% resends aren't subject to flow control here
+ resend_command(Leader, Corr, Cmd, State#state{pending = Pending});
+ error ->
+ State
+ end.
+
+resend_all_pending(#state{pending = Pend} = State) ->
+ Seqs = lists:sort(maps:keys(Pend)),
+ lists:foldl(fun resend/2, State, Seqs).
+
+maybe_auto_ack(true, Deliver, State0) ->
+ %% manual ack is enabled
+ {ok, State0, [Deliver]};
+maybe_auto_ack(false, {deliver, Tag, _Ack, Msgs} = Deliver, State0) ->
+ %% we have to auto ack these deliveries
+ MsgIds = [I || {_, _, I, _, _} <- Msgs],
+ {State, Actions} = settle(Tag, MsgIds, State0),
+ {ok, State, [Deliver] ++ Actions}.
+
+
+handle_delivery(Leader, {delivery, Tag, [{FstId, _} | _] = IdMsgs},
+ #state{cfg = #cfg{cluster_name = QName},
+ consumer_deliveries = CDels0} = State0) ->
+ QRef = qref(Leader),
+ {LastId, _} = lists:last(IdMsgs),
+ Consumer = #consumer{ack = Ack} = maps:get(Tag, CDels0),
+ %% format as a deliver action
+ Del = {deliver, Tag, Ack, transform_msgs(QName, QRef, IdMsgs)},
+ %% TODO: remove potential default allocation
+ case Consumer of
+ #consumer{last_msg_id = Prev} = C
+ when FstId =:= Prev+1 ->
+ maybe_auto_ack(Ack, Del,
+ State0#state{consumer_deliveries =
+ update_consumer(Tag, LastId,
+ length(IdMsgs), C,
+ CDels0)});
+ #consumer{last_msg_id = Prev} = C
+ when FstId > Prev+1 ->
+ NumMissing = FstId - Prev + 1,
+ %% there may actually be fewer missing messages returned than expected
+ %% This can happen when a node the channel is on gets disconnected
+ %% from the node the leader is on and then reconnected afterwards.
+ %% When the node is disconnected the leader will return all checked
+ %% out messages to the main queue to ensure they don't get stuck in
+ %% case the node never comes back.
+ case get_missing_deliveries(Leader, Prev+1, FstId-1, Tag) of
+ {protocol_error, _, _, _} = Err ->
+ Err;
+ Missing ->
+ XDel = {deliver, Tag, Ack, transform_msgs(QName, QRef,
+ Missing ++ IdMsgs)},
+ maybe_auto_ack(Ack, XDel,
+ State0#state{consumer_deliveries =
+ update_consumer(Tag, LastId,
+ length(IdMsgs) + NumMissing,
+ C, CDels0)})
+ end;
+ #consumer{last_msg_id = Prev}
+ when FstId =< Prev ->
+ case lists:dropwhile(fun({Id, _}) -> Id =< Prev end, IdMsgs) of
+ [] ->
+ {ok, State0, []};
+ IdMsgs2 ->
+ handle_delivery(Leader, {delivery, Tag, IdMsgs2}, State0)
+ end;
+ C when FstId =:= 0 ->
+ % the very first delivery
+ maybe_auto_ack(Ack, Del,
+ State0#state{consumer_deliveries =
+ update_consumer(Tag, LastId,
+ length(IdMsgs),
+ C#consumer{last_msg_id = LastId},
+ CDels0)})
+ end.
+
+transform_msgs(QName, QRef, Msgs) ->
+ lists:map(
+ fun({MsgId, {MsgHeader, Msg0}}) ->
+ {Msg, Redelivered} = case MsgHeader of
+ #{delivery_count := C} ->
+ {add_delivery_count_header(Msg0, C), true};
+ _ ->
+ {Msg0, false}
+ end,
+ {QName, QRef, MsgId, Redelivered, Msg}
+ end, Msgs).
+
+update_consumer(Tag, LastId, DelCntIncr,
+ #consumer{delivery_count = D} = C, Consumers) ->
+ maps:put(Tag,
+ C#consumer{last_msg_id = LastId,
+ delivery_count = D + DelCntIncr},
+ Consumers).
+
+
+get_missing_deliveries(Leader, From, To, ConsumerTag) ->
+ ConsumerId = consumer_id(ConsumerTag),
+ % ?INFO("get_missing_deliveries for ~w from ~b to ~b",
+ % [ConsumerId, From, To]),
+ Query = fun (State) ->
+ rabbit_fifo:get_checked_out(ConsumerId, From, To, State)
+ end,
+ case ra:local_query(Leader, Query) of
+ {ok, {_, Missing}, _} ->
+ Missing;
+ {error, Error} ->
+ {protocol_error, internal_error, "Cannot query missing deliveries from ~p: ~p",
+ [Leader, Error]};
+ {timeout, _} ->
+ {protocol_error, internal_error, "Cannot query missing deliveries from ~p: timeout",
+ [Leader]}
+ end.
+
+pick_server(#state{leader = undefined,
+ cfg = #cfg{servers = [N | _]}}) ->
+ %% TODO: pick random rather that first?
+ N;
+pick_server(#state{leader = Leader}) ->
+ Leader.
+
+% servers sorted by last known leader
+sorted_servers(#state{leader = undefined,
+ cfg = #cfg{servers = Servers}}) ->
+ Servers;
+sorted_servers(#state{leader = Leader,
+ cfg = #cfg{servers = Servers}}) ->
+ [Leader | lists:delete(Leader, Servers)].
+
+next_seq(#state{next_seq = Seq} = State) ->
+ {Seq, State#state{next_seq = Seq + 1}}.
+
+next_enqueue_seq(#state{next_enqueue_seq = Seq} = State) ->
+ {Seq, State#state{next_enqueue_seq = Seq + 1}}.
+
+consumer_id(ConsumerTag) ->
+ {ConsumerTag, self()}.
+
+send_command(Server, Correlation, Command, Priority,
+ #state{pending = Pending,
+ cfg = #cfg{soft_limit = SftLmt}} = State0) ->
+ {Seq, State} = next_seq(State0),
+ ok = ra:pipeline_command(Server, Command, Seq, Priority),
+ Tag = case maps:size(Pending) >= SftLmt of
+ true -> slow;
+ false -> ok
+ end,
+ {Tag, State#state{pending = Pending#{Seq => {Correlation, Command}},
+ slow = Tag == slow}}.
+
+resend_command(Node, Correlation, Command,
+ #state{pending = Pending} = State0) ->
+ {Seq, State} = next_seq(State0),
+ ok = ra:pipeline_command(Node, Command, Seq),
+ State#state{pending = Pending#{Seq => {Correlation, Command}}}.
+
+add_command(_, _, [], Acc) ->
+ Acc;
+add_command(Cid, settle, MsgIds, Acc) ->
+ [rabbit_fifo:make_settle(Cid, MsgIds) | Acc];
+add_command(Cid, return, MsgIds, Acc) ->
+ [rabbit_fifo:make_return(Cid, MsgIds) | Acc];
+add_command(Cid, discard, MsgIds, Acc) ->
+ [rabbit_fifo:make_discard(Cid, MsgIds) | Acc].
+
+set_timer(#state{leader = Leader0,
+ cfg = #cfg{servers = [Server | _],
+ cluster_name = QName}} = State) ->
+ Leader = case Leader0 of
+ undefined -> Server;
+ _ ->
+ Leader0
+ end,
+ Ref = erlang:send_after(?TIMER_TIME, self(),
+ {'$gen_cast',
+ {queue_event, QName, {Leader, timeout}}}),
+ State#state{timer_state = Ref}.
+
+cancel_timer(#state{timer_state = undefined} = State) ->
+ State;
+cancel_timer(#state{timer_state = Ref} = State) ->
+ erlang:cancel_timer(Ref, [{async, true}, {info, false}]),
+ State#state{timer_state = undefined}.
+
+find_leader([]) ->
+ undefined;
+find_leader([Server | Servers]) ->
+ case ra:members(Server, 500) of
+ {ok, _, Leader} -> Leader;
+ _ ->
+ find_leader(Servers)
+ end.
+
+qref({Ref, _}) -> Ref;
+qref(Ref) -> Ref.
diff --git a/deps/rabbit/src/rabbit_fifo_index.erl b/deps/rabbit/src/rabbit_fifo_index.erl
new file mode 100644
index 0000000000..14ac89faff
--- /dev/null
+++ b/deps/rabbit/src/rabbit_fifo_index.erl
@@ -0,0 +1,119 @@
+-module(rabbit_fifo_index).
+
+-export([
+ empty/0,
+ exists/2,
+ append/2,
+ delete/2,
+ size/1,
+ smallest/1,
+ map/2
+ ]).
+
+-compile({no_auto_import, [size/1]}).
+
+%% the empty atom is a lot smaller (4 bytes) than e.g. `undefined` (13 bytes).
+%% This matters as the data map gets persisted as part of the snapshot
+-define(NIL, '').
+
+-record(?MODULE, {data = #{} :: #{integer() => ?NIL},
+ smallest :: undefined | non_neg_integer(),
+ largest :: undefined | non_neg_integer()
+ }).
+
+
+-opaque state() :: #?MODULE{}.
+
+-export_type([state/0]).
+
+-spec empty() -> state().
+empty() ->
+ #?MODULE{}.
+
+-spec exists(integer(), state()) -> boolean().
+exists(Key, #?MODULE{data = Data}) ->
+ maps:is_key(Key, Data).
+
+% only integer keys are supported
+-spec append(integer(), state()) -> state().
+append(Key,
+ #?MODULE{data = Data,
+ smallest = Smallest,
+ largest = Largest} = State)
+ when Key > Largest orelse Largest =:= undefined ->
+ State#?MODULE{data = maps:put(Key, ?NIL, Data),
+ smallest = ra_lib:default(Smallest, Key),
+ largest = Key}.
+
+-spec delete(Index :: integer(), state()) -> state().
+delete(Smallest, #?MODULE{data = Data0,
+ largest = Largest,
+ smallest = Smallest} = State) ->
+ Data = maps:remove(Smallest, Data0),
+ case find_next(Smallest + 1, Largest, Data) of
+ undefined ->
+ State#?MODULE{data = Data,
+ smallest = undefined,
+ largest = undefined};
+ Next ->
+ State#?MODULE{data = Data, smallest = Next}
+ end;
+delete(Key, #?MODULE{data = Data} = State) ->
+ State#?MODULE{data = maps:remove(Key, Data)}.
+
+-spec size(state()) -> non_neg_integer().
+size(#?MODULE{data = Data}) ->
+ maps:size(Data).
+
+-spec smallest(state()) -> undefined | integer().
+smallest(#?MODULE{smallest = Smallest}) ->
+ Smallest.
+
+
+-spec map(fun(), state()) -> state().
+map(F, #?MODULE{data = Data} = State) ->
+ State#?MODULE{data = maps:map(F, Data)}.
+
+
+%% internal
+
+find_next(Next, Last, _Map) when Next > Last ->
+ undefined;
+find_next(Next, Last, Map) ->
+ case Map of
+ #{Next := _} ->
+ Next;
+ _ ->
+ % in degenerate cases the range here could be very large
+ % and hence this could be very slow
+ % the typical case should ideally be better
+ % assuming fifo-ish deletion of entries
+ find_next(Next+1, Last, Map)
+ end.
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+append_test() ->
+ S0 = empty(),
+ false = exists(99, S0),
+ undefined = smallest(S0),
+ 0 = size(S0),
+ S1 = append(1, S0),
+ false = exists(99, S1),
+ true = exists(1, S1),
+ 1 = size(S1),
+ 1 = smallest(S1),
+ S2 = append(2, S1),
+ true = exists(2, S2),
+ 2 = size(S2),
+ 1 = smallest(S2),
+ S3 = delete(1, S2),
+ 2 = smallest(S3),
+ 1 = size(S3),
+ S5 = delete(2, S3),
+ undefined = smallest(S5),
+ 0 = size(S0),
+ ok.
+
+-endif.
diff --git a/deps/rabbit/src/rabbit_fifo_v0.erl b/deps/rabbit/src/rabbit_fifo_v0.erl
new file mode 100644
index 0000000000..a61f42616d
--- /dev/null
+++ b/deps/rabbit/src/rabbit_fifo_v0.erl
@@ -0,0 +1,1961 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at https://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is GoPivotal, Inc.
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_fifo_v0).
+
+-behaviour(ra_machine).
+
+-compile(inline_list_funcs).
+-compile(inline).
+-compile({no_auto_import, [apply/3]}).
+
+-include("rabbit_fifo_v0.hrl").
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([
+ init/1,
+ apply/3,
+ state_enter/2,
+ tick/2,
+ overview/1,
+ get_checked_out/4,
+ %% aux
+ init_aux/1,
+ handle_aux/6,
+ % queries
+ query_messages_ready/1,
+ query_messages_checked_out/1,
+ query_messages_total/1,
+ query_processes/1,
+ query_ra_indexes/1,
+ query_consumer_count/1,
+ query_consumers/1,
+ query_stat/1,
+ query_single_active_consumer/1,
+ query_in_memory_usage/1,
+ usage/1,
+
+ zero/1,
+
+ %% misc
+ dehydrate_state/1,
+ normalize/1,
+ normalize_for_v1/1,
+ %% getters for coversions
+ get_field/2,
+ get_cfg_field/2,
+
+ %% protocol helpers
+ make_enqueue/3,
+ make_checkout/3,
+ make_settle/2,
+ make_return/2,
+ make_discard/2,
+ make_credit/4,
+ make_purge/0,
+ make_purge_nodes/1,
+ make_update_config/1
+ ]).
+
+%% command records representing all the protocol actions that are supported
+-record(enqueue, {pid :: option(pid()),
+ seq :: option(msg_seqno()),
+ msg :: raw_msg()}).
+-record(checkout, {consumer_id :: consumer_id(),
+ spec :: checkout_spec(),
+ meta :: consumer_meta()}).
+-record(settle, {consumer_id :: consumer_id(),
+ msg_ids :: [msg_id()]}).
+-record(return, {consumer_id :: consumer_id(),
+ msg_ids :: [msg_id()]}).
+-record(discard, {consumer_id :: consumer_id(),
+ msg_ids :: [msg_id()]}).
+-record(credit, {consumer_id :: consumer_id(),
+ credit :: non_neg_integer(),
+ delivery_count :: non_neg_integer(),
+ drain :: boolean()}).
+-record(purge, {}).
+-record(purge_nodes, {nodes :: [node()]}).
+-record(update_config, {config :: config()}).
+
+-opaque protocol() ::
+ #enqueue{} |
+ #checkout{} |
+ #settle{} |
+ #return{} |
+ #discard{} |
+ #credit{} |
+ #purge{} |
+ #purge_nodes{} |
+ #update_config{}.
+
+-type command() :: protocol() | ra_machine:builtin_command().
+%% all the command types supported by ra fifo
+
+-type client_msg() :: delivery().
+%% the messages `rabbit_fifo' can send to consumers.
+
+-opaque state() :: #?STATE{}.
+
+-export_type([protocol/0,
+ delivery/0,
+ command/0,
+ credit_mode/0,
+ consumer_tag/0,
+ consumer_meta/0,
+ consumer_id/0,
+ client_msg/0,
+ msg/0,
+ msg_id/0,
+ msg_seqno/0,
+ delivery_msg/0,
+ state/0,
+ config/0]).
+
+-spec init(config()) -> state().
+init(#{name := Name,
+ queue_resource := Resource} = Conf) ->
+ update_config(Conf, #?STATE{cfg = #cfg{name = Name,
+ resource = Resource}}).
+
+update_config(Conf, State) ->
+ DLH = maps:get(dead_letter_handler, Conf, undefined),
+ BLH = maps:get(become_leader_handler, Conf, undefined),
+ SHI = maps:get(release_cursor_interval, Conf, ?RELEASE_CURSOR_EVERY),
+ MaxLength = maps:get(max_length, Conf, undefined),
+ MaxBytes = maps:get(max_bytes, Conf, undefined),
+ MaxMemoryLength = maps:get(max_in_memory_length, Conf, undefined),
+ MaxMemoryBytes = maps:get(max_in_memory_bytes, Conf, undefined),
+ DeliveryLimit = maps:get(delivery_limit, Conf, undefined),
+ ConsumerStrategy = case maps:get(single_active_consumer_on, Conf, false) of
+ true ->
+ single_active;
+ false ->
+ competing
+ end,
+ Cfg = State#?STATE.cfg,
+ SHICur = case State#?STATE.cfg of
+ #cfg{release_cursor_interval = {_, C}} ->
+ C;
+ #cfg{release_cursor_interval = undefined} ->
+ SHI;
+ #cfg{release_cursor_interval = C} ->
+ C
+ end,
+
+ State#?STATE{cfg = Cfg#cfg{release_cursor_interval = {SHI, SHICur},
+ dead_letter_handler = DLH,
+ become_leader_handler = BLH,
+ max_length = MaxLength,
+ max_bytes = MaxBytes,
+ max_in_memory_length = MaxMemoryLength,
+ max_in_memory_bytes = MaxMemoryBytes,
+ consumer_strategy = ConsumerStrategy,
+ delivery_limit = DeliveryLimit}}.
+
+zero(_) ->
+ 0.
+
+% msg_ids are scoped per consumer
+% ra_indexes holds all raft indexes for enqueues currently on queue
+-spec apply(ra_machine:command_meta_data(), command(), state()) ->
+ {state(), Reply :: term(), ra_machine:effects()} |
+ {state(), Reply :: term()}.
+apply(Metadata, #enqueue{pid = From, seq = Seq,
+ msg = RawMsg}, State00) ->
+ apply_enqueue(Metadata, From, Seq, RawMsg, State00);
+apply(Meta,
+ #settle{msg_ids = MsgIds, consumer_id = ConsumerId},
+ #?STATE{consumers = Cons0} = State) ->
+ case Cons0 of
+ #{ConsumerId := Con0} ->
+ % need to increment metrics before completing as any snapshot
+ % states taken need to include them
+ complete_and_checkout(Meta, MsgIds, ConsumerId,
+ Con0, [], State);
+ _ ->
+ {State, ok}
+
+ end;
+apply(Meta, #discard{msg_ids = MsgIds, consumer_id = ConsumerId},
+ #?STATE{consumers = Cons0} = State0) ->
+ case Cons0 of
+ #{ConsumerId := Con0} ->
+ Discarded = maps:with(MsgIds, Con0#consumer.checked_out),
+ Effects = dead_letter_effects(rejected, Discarded, State0, []),
+ complete_and_checkout(Meta, MsgIds, ConsumerId, Con0,
+ Effects, State0);
+ _ ->
+ {State0, ok}
+ end;
+apply(Meta, #return{msg_ids = MsgIds, consumer_id = ConsumerId},
+ #?STATE{consumers = Cons0} = State) ->
+ case Cons0 of
+ #{ConsumerId := #consumer{checked_out = Checked0}} ->
+ Returned = maps:with(MsgIds, Checked0),
+ return(Meta, ConsumerId, Returned, [], State);
+ _ ->
+ {State, ok}
+ end;
+apply(Meta, #credit{credit = NewCredit, delivery_count = RemoteDelCnt,
+ drain = Drain, consumer_id = ConsumerId},
+ #?STATE{consumers = Cons0,
+ service_queue = ServiceQueue0,
+ waiting_consumers = Waiting0} = State0) ->
+ case Cons0 of
+ #{ConsumerId := #consumer{delivery_count = DelCnt} = Con0} ->
+ %% this can go below 0 when credit is reduced
+ C = max(0, RemoteDelCnt + NewCredit - DelCnt),
+ %% grant the credit
+ Con1 = Con0#consumer{credit = C},
+ ServiceQueue = maybe_queue_consumer(ConsumerId, Con1,
+ ServiceQueue0),
+ Cons = maps:put(ConsumerId, Con1, Cons0),
+ {State1, ok, Effects} =
+ checkout(Meta, State0#?STATE{service_queue = ServiceQueue,
+ consumers = Cons}, []),
+ Response = {send_credit_reply, messages_ready(State1)},
+ %% by this point all checkouts for the updated credit value
+ %% should be processed so we can evaluate the drain
+ case Drain of
+ false ->
+ %% just return the result of the checkout
+ {State1, Response, Effects};
+ true ->
+ Con = #consumer{credit = PostCred} =
+ maps:get(ConsumerId, State1#?STATE.consumers),
+ %% add the outstanding credit to the delivery count
+ DeliveryCount = Con#consumer.delivery_count + PostCred,
+ Consumers = maps:put(ConsumerId,
+ Con#consumer{delivery_count = DeliveryCount,
+ credit = 0},
+ State1#?STATE.consumers),
+ Drained = Con#consumer.credit,
+ {CTag, _} = ConsumerId,
+ {State1#?STATE{consumers = Consumers},
+ %% returning a multi response with two client actions
+ %% for the channel to execute
+ {multi, [Response, {send_drained, {CTag, Drained}}]},
+ Effects}
+ end;
+ _ when Waiting0 /= [] ->
+ %% there are waiting consuemrs
+ case lists:keytake(ConsumerId, 1, Waiting0) of
+ {value, {_, Con0 = #consumer{delivery_count = DelCnt}}, Waiting} ->
+ %% the consumer is a waiting one
+ %% grant the credit
+ C = max(0, RemoteDelCnt + NewCredit - DelCnt),
+ Con = Con0#consumer{credit = C},
+ State = State0#?STATE{waiting_consumers =
+ [{ConsumerId, Con} | Waiting]},
+ {State, {send_credit_reply, messages_ready(State)}};
+ false ->
+ {State0, ok}
+ end;
+ _ ->
+ %% credit for unknown consumer - just ignore
+ {State0, ok}
+ end;
+apply(_, #checkout{spec = {dequeue, _}},
+ #?STATE{cfg = #cfg{consumer_strategy = single_active}} = State0) ->
+ {State0, {error, unsupported}};
+apply(#{from := From} = Meta, #checkout{spec = {dequeue, Settlement},
+ meta = ConsumerMeta,
+ consumer_id = ConsumerId},
+ #?STATE{consumers = Consumers} = State0) ->
+ Exists = maps:is_key(ConsumerId, Consumers),
+ case messages_ready(State0) of
+ 0 ->
+ {State0, {dequeue, empty}};
+ _ when Exists ->
+ %% a dequeue using the same consumer_id isn't possible at this point
+ {State0, {dequeue, empty}};
+ Ready ->
+ State1 = update_consumer(ConsumerId, ConsumerMeta,
+ {once, 1, simple_prefetch},
+ State0),
+ {success, _, MsgId, Msg, State2} = checkout_one(State1),
+ {State, Effects} = case Settlement of
+ unsettled ->
+ {_, Pid} = ConsumerId,
+ {State2, [{monitor, process, Pid}]};
+ settled ->
+ %% immediately settle the checkout
+ {State3, _, Effects0} =
+ apply(Meta, make_settle(ConsumerId, [MsgId]),
+ State2),
+ {State3, Effects0}
+ end,
+ case Msg of
+ {RaftIdx, {Header, 'empty'}} ->
+ %% TODO add here new log effect with reply
+ {State, '$ra_no_reply',
+ reply_log_effect(RaftIdx, MsgId, Header, Ready - 1, From)};
+ _ ->
+ {State, {dequeue, {MsgId, Msg}, Ready-1}, Effects}
+ end
+ end;
+apply(Meta, #checkout{spec = cancel, consumer_id = ConsumerId}, State0) ->
+ {State, Effects} = cancel_consumer(ConsumerId, State0, [], consumer_cancel),
+ checkout(Meta, State, Effects);
+apply(Meta, #checkout{spec = Spec, meta = ConsumerMeta,
+ consumer_id = {_, Pid} = ConsumerId},
+ State0) ->
+ State1 = update_consumer(ConsumerId, ConsumerMeta, Spec, State0),
+ checkout(Meta, State1, [{monitor, process, Pid}]);
+apply(#{index := RaftIdx}, #purge{},
+ #?STATE{ra_indexes = Indexes0,
+ returns = Returns,
+ messages = Messages} = State0) ->
+ Total = messages_ready(State0),
+ Indexes1 = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes0,
+ [I || {I, _} <- lists:sort(maps:values(Messages))]),
+ Indexes = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes1,
+ [I || {_, {I, _}} <- lqueue:to_list(Returns)]),
+ {State, _, Effects} =
+ update_smallest_raft_index(RaftIdx,
+ State0#?STATE{ra_indexes = Indexes,
+ messages = #{},
+ returns = lqueue:new(),
+ msg_bytes_enqueue = 0,
+ prefix_msgs = {0, [], 0, []},
+ low_msg_num = undefined,
+ msg_bytes_in_memory = 0,
+ msgs_ready_in_memory = 0},
+ []),
+ %% as we're not checking out after a purge (no point) we have to
+ %% reverse the effects ourselves
+ {State, {purge, Total},
+ lists:reverse([garbage_collection | Effects])};
+apply(Meta, {down, Pid, noconnection},
+ #?STATE{consumers = Cons0,
+ cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = Waiting0,
+ enqueuers = Enqs0} = State0) ->
+ Node = node(Pid),
+ %% if the pid refers to an active or cancelled consumer,
+ %% mark it as suspected and return it to the waiting queue
+ {State1, Effects0} =
+ maps:fold(fun({_, P} = Cid, C0, {S0, E0})
+ when node(P) =:= Node ->
+ %% the consumer should be returned to waiting
+ %% and checked out messages should be returned
+ Effs = consumer_update_active_effects(
+ S0, Cid, C0, false, suspected_down, E0),
+ Checked = C0#consumer.checked_out,
+ Credit = increase_credit(C0, maps:size(Checked)),
+ {St, Effs1} = return_all(S0, Effs,
+ Cid, C0#consumer{credit = Credit}),
+ %% if the consumer was cancelled there is a chance it got
+ %% removed when returning hence we need to be defensive here
+ Waiting = case St#?STATE.consumers of
+ #{Cid := C} ->
+ Waiting0 ++ [{Cid, C}];
+ _ ->
+ Waiting0
+ end,
+ {St#?STATE{consumers = maps:remove(Cid, St#?STATE.consumers),
+ waiting_consumers = Waiting},
+ Effs1};
+ (_, _, S) ->
+ S
+ end, {State0, []}, Cons0),
+ WaitingConsumers = update_waiting_consumer_status(Node, State1,
+ suspected_down),
+
+ %% select a new consumer from the waiting queue and run a checkout
+ State2 = State1#?STATE{waiting_consumers = WaitingConsumers},
+ {State, Effects1} = activate_next_consumer(State2, Effects0),
+
+ %% mark any enquers as suspected
+ Enqs = maps:map(fun(P, E) when node(P) =:= Node ->
+ E#enqueuer{status = suspected_down};
+ (_, E) -> E
+ end, Enqs0),
+ Effects = [{monitor, node, Node} | Effects1],
+ checkout(Meta, State#?STATE{enqueuers = Enqs}, Effects);
+apply(Meta, {down, Pid, noconnection},
+ #?STATE{consumers = Cons0,
+ enqueuers = Enqs0} = State0) ->
+ %% A node has been disconnected. This doesn't necessarily mean that
+ %% any processes on this node are down, they _may_ come back so here
+ %% we just mark them as suspected (effectively deactivated)
+ %% and return all checked out messages to the main queue for delivery to any
+ %% live consumers
+ %%
+ %% all pids for the disconnected node will be marked as suspected not just
+ %% the one we got the `down' command for
+ Node = node(Pid),
+
+ {State, Effects1} =
+ maps:fold(
+ fun({_, P} = Cid, #consumer{checked_out = Checked0,
+ status = up} = C0,
+ {St0, Eff}) when node(P) =:= Node ->
+ Credit = increase_credit(C0, map_size(Checked0)),
+ C = C0#consumer{status = suspected_down,
+ credit = Credit},
+ {St, Eff0} = return_all(St0, Eff, Cid, C),
+ Eff1 = consumer_update_active_effects(St, Cid, C, false,
+ suspected_down, Eff0),
+ {St, Eff1};
+ (_, _, {St, Eff}) ->
+ {St, Eff}
+ end, {State0, []}, Cons0),
+ Enqs = maps:map(fun(P, E) when node(P) =:= Node ->
+ E#enqueuer{status = suspected_down};
+ (_, E) -> E
+ end, Enqs0),
+
+ % Monitor the node so that we can "unsuspect" these processes when the node
+ % comes back, then re-issue all monitors and discover the final fate of
+ % these processes
+ Effects = case maps:size(State#?STATE.consumers) of
+ 0 ->
+ [{aux, inactive}, {monitor, node, Node}];
+ _ ->
+ [{monitor, node, Node}]
+ end ++ Effects1,
+ checkout(Meta, State#?STATE{enqueuers = Enqs}, Effects);
+apply(Meta, {down, Pid, _Info}, State0) ->
+ {State, Effects} = handle_down(Pid, State0),
+ checkout(Meta, State, Effects);
+apply(Meta, {nodeup, Node}, #?STATE{consumers = Cons0,
+ enqueuers = Enqs0,
+ service_queue = SQ0} = State0) ->
+ %% A node we are monitoring has come back.
+ %% If we have suspected any processes of being
+ %% down we should now re-issue the monitors for them to detect if they're
+ %% actually down or not
+ Monitors = [{monitor, process, P}
+ || P <- suspected_pids_for(Node, State0)],
+
+ Enqs1 = maps:map(fun(P, E) when node(P) =:= Node ->
+ E#enqueuer{status = up};
+ (_, E) -> E
+ end, Enqs0),
+ ConsumerUpdateActiveFun = consumer_active_flag_update_function(State0),
+ %% mark all consumers as up
+ {Cons1, SQ, Effects1} =
+ maps:fold(fun({_, P} = ConsumerId, C, {CAcc, SQAcc, EAcc})
+ when (node(P) =:= Node) and
+ (C#consumer.status =/= cancelled) ->
+ EAcc1 = ConsumerUpdateActiveFun(State0, ConsumerId,
+ C, true, up, EAcc),
+ update_or_remove_sub(ConsumerId,
+ C#consumer{status = up}, CAcc,
+ SQAcc, EAcc1);
+ (_, _, Acc) ->
+ Acc
+ end, {Cons0, SQ0, Monitors}, Cons0),
+ Waiting = update_waiting_consumer_status(Node, State0, up),
+ State1 = State0#?STATE{consumers = Cons1,
+ enqueuers = Enqs1,
+ service_queue = SQ,
+ waiting_consumers = Waiting},
+ {State, Effects} = activate_next_consumer(State1, Effects1),
+ checkout(Meta, State, Effects);
+apply(_, {nodedown, _Node}, State) ->
+ {State, ok};
+apply(_, #purge_nodes{nodes = Nodes}, State0) ->
+ {State, Effects} = lists:foldl(fun(Node, {S, E}) ->
+ purge_node(Node, S, E)
+ end, {State0, []}, Nodes),
+ {State, ok, Effects};
+apply(Meta, #update_config{config = Conf}, State) ->
+ checkout(Meta, update_config(Conf, State), []).
+
+purge_node(Node, State, Effects) ->
+ lists:foldl(fun(Pid, {S0, E0}) ->
+ {S, E} = handle_down(Pid, S0),
+ {S, E0 ++ E}
+ end, {State, Effects}, all_pids_for(Node, State)).
+
+%% any downs that re not noconnection
+handle_down(Pid, #?STATE{consumers = Cons0,
+ enqueuers = Enqs0} = State0) ->
+ % Remove any enqueuer for the same pid and enqueue any pending messages
+ % This should be ok as we won't see any more enqueues from this pid
+ State1 = case maps:take(Pid, Enqs0) of
+ {#enqueuer{pending = Pend}, Enqs} ->
+ lists:foldl(fun ({_, RIdx, RawMsg}, S) ->
+ enqueue(RIdx, RawMsg, S)
+ end, State0#?STATE{enqueuers = Enqs}, Pend);
+ error ->
+ State0
+ end,
+ {Effects1, State2} = handle_waiting_consumer_down(Pid, State1),
+ % return checked out messages to main queue
+ % Find the consumers for the down pid
+ DownConsumers = maps:keys(
+ maps:filter(fun({_, P}, _) -> P =:= Pid end, Cons0)),
+ lists:foldl(fun(ConsumerId, {S, E}) ->
+ cancel_consumer(ConsumerId, S, E, down)
+ end, {State2, Effects1}, DownConsumers).
+
+consumer_active_flag_update_function(#?STATE{cfg = #cfg{consumer_strategy = competing}}) ->
+ fun(State, ConsumerId, Consumer, Active, ActivityStatus, Effects) ->
+ consumer_update_active_effects(State, ConsumerId, Consumer, Active,
+ ActivityStatus, Effects)
+ end;
+consumer_active_flag_update_function(#?STATE{cfg = #cfg{consumer_strategy = single_active}}) ->
+ fun(_, _, _, _, _, Effects) ->
+ Effects
+ end.
+
+handle_waiting_consumer_down(_Pid,
+ #?STATE{cfg = #cfg{consumer_strategy = competing}} = State) ->
+ {[], State};
+handle_waiting_consumer_down(_Pid,
+ #?STATE{cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = []} = State) ->
+ {[], State};
+handle_waiting_consumer_down(Pid,
+ #?STATE{cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = WaitingConsumers0} = State0) ->
+ % get cancel effects for down waiting consumers
+ Down = lists:filter(fun({{_, P}, _}) -> P =:= Pid end,
+ WaitingConsumers0),
+ Effects = lists:foldl(fun ({ConsumerId, _}, Effects) ->
+ cancel_consumer_effects(ConsumerId, State0,
+ Effects)
+ end, [], Down),
+ % update state to have only up waiting consumers
+ StillUp = lists:filter(fun({{_, P}, _}) -> P =/= Pid end,
+ WaitingConsumers0),
+ State = State0#?STATE{waiting_consumers = StillUp},
+ {Effects, State}.
+
+update_waiting_consumer_status(Node,
+ #?STATE{waiting_consumers = WaitingConsumers},
+ Status) ->
+ [begin
+ case node(Pid) of
+ Node ->
+ {ConsumerId, Consumer#consumer{status = Status}};
+ _ ->
+ {ConsumerId, Consumer}
+ end
+ end || {{_, Pid} = ConsumerId, Consumer} <- WaitingConsumers,
+ Consumer#consumer.status =/= cancelled].
+
+-spec state_enter(ra_server:ra_state(), state()) -> ra_machine:effects().
+state_enter(leader, #?STATE{consumers = Cons,
+ enqueuers = Enqs,
+ waiting_consumers = WaitingConsumers,
+ cfg = #cfg{name = Name,
+ resource = Resource,
+ become_leader_handler = BLH},
+ prefix_msgs = {0, [], 0, []}
+ }) ->
+ % return effects to monitor all current consumers and enqueuers
+ Pids = lists:usort(maps:keys(Enqs)
+ ++ [P || {_, P} <- maps:keys(Cons)]
+ ++ [P || {{_, P}, _} <- WaitingConsumers]),
+ Mons = [{monitor, process, P} || P <- Pids],
+ Nots = [{send_msg, P, leader_change, ra_event} || P <- Pids],
+ NodeMons = lists:usort([{monitor, node, node(P)} || P <- Pids]),
+ FHReservation = [{mod_call, rabbit_quorum_queue, file_handle_leader_reservation, [Resource]}],
+ Effects = Mons ++ Nots ++ NodeMons ++ FHReservation,
+ case BLH of
+ undefined ->
+ Effects;
+ {Mod, Fun, Args} ->
+ [{mod_call, Mod, Fun, Args ++ [Name]} | Effects]
+ end;
+state_enter(eol, #?STATE{enqueuers = Enqs,
+ consumers = Custs0,
+ waiting_consumers = WaitingConsumers0}) ->
+ Custs = maps:fold(fun({_, P}, V, S) -> S#{P => V} end, #{}, Custs0),
+ WaitingConsumers1 = lists:foldl(fun({{_, P}, V}, Acc) -> Acc#{P => V} end,
+ #{}, WaitingConsumers0),
+ AllConsumers = maps:merge(Custs, WaitingConsumers1),
+ [{send_msg, P, eol, ra_event}
+ || P <- maps:keys(maps:merge(Enqs, AllConsumers))] ++
+ [{mod_call, rabbit_quorum_queue, file_handle_release_reservation, []}];
+state_enter(State, #?STATE{cfg = #cfg{resource = _Resource}}) when State =/= leader ->
+ FHReservation = {mod_call, rabbit_quorum_queue, file_handle_other_reservation, []},
+ [FHReservation];
+ state_enter(_, _) ->
+ %% catch all as not handling all states
+ [].
+
+
+-spec tick(non_neg_integer(), state()) -> ra_machine:effects().
+tick(_Ts, #?STATE{cfg = #cfg{name = Name,
+ resource = QName},
+ msg_bytes_enqueue = EnqueueBytes,
+ msg_bytes_checkout = CheckoutBytes} = State) ->
+ Metrics = {Name,
+ messages_ready(State),
+ num_checked_out(State), % checked out
+ messages_total(State),
+ query_consumer_count(State), % Consumers
+ EnqueueBytes,
+ CheckoutBytes},
+ [{mod_call, rabbit_quorum_queue,
+ handle_tick, [QName, Metrics, all_nodes(State)]}].
+
+-spec overview(state()) -> map().
+overview(#?STATE{consumers = Cons,
+ enqueuers = Enqs,
+ release_cursors = Cursors,
+ enqueue_count = EnqCount,
+ msg_bytes_enqueue = EnqueueBytes,
+ msg_bytes_checkout = CheckoutBytes,
+ cfg = Cfg} = State) ->
+ Conf = #{name => Cfg#cfg.name,
+ resource => Cfg#cfg.resource,
+ release_cursor_interval => Cfg#cfg.release_cursor_interval,
+ dead_lettering_enabled => undefined =/= Cfg#cfg.dead_letter_handler,
+ max_length => Cfg#cfg.max_length,
+ max_bytes => Cfg#cfg.max_bytes,
+ consumer_strategy => Cfg#cfg.consumer_strategy,
+ max_in_memory_length => Cfg#cfg.max_in_memory_length,
+ max_in_memory_bytes => Cfg#cfg.max_in_memory_bytes},
+ #{type => ?MODULE,
+ config => Conf,
+ num_consumers => maps:size(Cons),
+ num_checked_out => num_checked_out(State),
+ num_enqueuers => maps:size(Enqs),
+ num_ready_messages => messages_ready(State),
+ num_messages => messages_total(State),
+ num_release_cursors => lqueue:len(Cursors),
+ release_crusor_enqueue_counter => EnqCount,
+ enqueue_message_bytes => EnqueueBytes,
+ checkout_message_bytes => CheckoutBytes}.
+
+-spec get_checked_out(consumer_id(), msg_id(), msg_id(), state()) ->
+ [delivery_msg()].
+get_checked_out(Cid, From, To, #?STATE{consumers = Consumers}) ->
+ case Consumers of
+ #{Cid := #consumer{checked_out = Checked}} ->
+ [{K, snd(snd(maps:get(K, Checked)))}
+ || K <- lists:seq(From, To),
+ maps:is_key(K, Checked)];
+ _ ->
+ []
+ end.
+
+-record(aux_gc, {last_raft_idx = 0 :: ra:index()}).
+-record(aux, {name :: atom(),
+ utilisation :: term(),
+ gc = #aux_gc{} :: #aux_gc{}}).
+
+init_aux(Name) when is_atom(Name) ->
+ %% TODO: catch specific exception throw if table already exists
+ ok = ra_machine_ets:create_table(rabbit_fifo_usage,
+ [named_table, set, public,
+ {write_concurrency, true}]),
+ Now = erlang:monotonic_time(micro_seconds),
+ #aux{name = Name,
+ utilisation = {inactive, Now, 1, 1.0}}.
+
+handle_aux(_RaState, cast, Cmd, #aux{name = Name,
+ utilisation = Use0} = State0,
+ Log, MacState) ->
+ State = case Cmd of
+ _ when Cmd == active orelse Cmd == inactive ->
+ State0#aux{utilisation = update_use(Use0, Cmd)};
+ tick ->
+ true = ets:insert(rabbit_fifo_usage,
+ {Name, utilisation(Use0)}),
+ eval_gc(Log, MacState, State0);
+ eval ->
+ State0
+ end,
+ {no_reply, State, Log}.
+
+eval_gc(Log, #?STATE{cfg = #cfg{resource = QR}} = MacState,
+ #aux{gc = #aux_gc{last_raft_idx = LastGcIdx} = Gc} = AuxState) ->
+ {Idx, _} = ra_log:last_index_term(Log),
+ {memory, Mem} = erlang:process_info(self(), memory),
+ case messages_total(MacState) of
+ 0 when Idx > LastGcIdx andalso
+ Mem > ?GC_MEM_LIMIT_B ->
+ garbage_collect(),
+ {memory, MemAfter} = erlang:process_info(self(), memory),
+ rabbit_log:debug("~s: full GC sweep complete. "
+ "Process memory changed from ~.2fMB to ~.2fMB.",
+ [rabbit_misc:rs(QR), Mem/?MB, MemAfter/?MB]),
+ AuxState#aux{gc = Gc#aux_gc{last_raft_idx = Idx}};
+ _ ->
+ AuxState
+ end.
+
+%%% Queries
+
+query_messages_ready(State) ->
+ messages_ready(State).
+
+query_messages_checked_out(#?STATE{consumers = Consumers}) ->
+ maps:fold(fun (_, #consumer{checked_out = C}, S) ->
+ maps:size(C) + S
+ end, 0, Consumers).
+
+query_messages_total(State) ->
+ messages_total(State).
+
+query_processes(#?STATE{enqueuers = Enqs, consumers = Cons0}) ->
+ Cons = maps:fold(fun({_, P}, V, S) -> S#{P => V} end, #{}, Cons0),
+ maps:keys(maps:merge(Enqs, Cons)).
+
+
+query_ra_indexes(#?STATE{ra_indexes = RaIndexes}) ->
+ RaIndexes.
+
+query_consumer_count(#?STATE{consumers = Consumers,
+ waiting_consumers = WaitingConsumers}) ->
+ maps:size(Consumers) + length(WaitingConsumers).
+
+query_consumers(#?STATE{consumers = Consumers,
+ waiting_consumers = WaitingConsumers,
+ cfg = #cfg{consumer_strategy = ConsumerStrategy}} = State) ->
+ ActiveActivityStatusFun =
+ case ConsumerStrategy of
+ competing ->
+ fun(_ConsumerId,
+ #consumer{status = Status}) ->
+ case Status of
+ suspected_down ->
+ {false, Status};
+ _ ->
+ {true, Status}
+ end
+ end;
+ single_active ->
+ SingleActiveConsumer = query_single_active_consumer(State),
+ fun({Tag, Pid} = _Consumer, _) ->
+ case SingleActiveConsumer of
+ {value, {Tag, Pid}} ->
+ {true, single_active};
+ _ ->
+ {false, waiting}
+ end
+ end
+ end,
+ FromConsumers =
+ maps:fold(fun (_, #consumer{status = cancelled}, Acc) ->
+ Acc;
+ ({Tag, Pid}, #consumer{meta = Meta} = Consumer, Acc) ->
+ {Active, ActivityStatus} =
+ ActiveActivityStatusFun({Tag, Pid}, Consumer),
+ maps:put({Tag, Pid},
+ {Pid, Tag,
+ maps:get(ack, Meta, undefined),
+ maps:get(prefetch, Meta, undefined),
+ Active,
+ ActivityStatus,
+ maps:get(args, Meta, []),
+ maps:get(username, Meta, undefined)},
+ Acc)
+ end, #{}, Consumers),
+ FromWaitingConsumers =
+ lists:foldl(fun ({_, #consumer{status = cancelled}}, Acc) ->
+ Acc;
+ ({{Tag, Pid}, #consumer{meta = Meta} = Consumer}, Acc) ->
+ {Active, ActivityStatus} =
+ ActiveActivityStatusFun({Tag, Pid}, Consumer),
+ maps:put({Tag, Pid},
+ {Pid, Tag,
+ maps:get(ack, Meta, undefined),
+ maps:get(prefetch, Meta, undefined),
+ Active,
+ ActivityStatus,
+ maps:get(args, Meta, []),
+ maps:get(username, Meta, undefined)},
+ Acc)
+ end, #{}, WaitingConsumers),
+ maps:merge(FromConsumers, FromWaitingConsumers).
+
+query_single_active_consumer(#?STATE{cfg = #cfg{consumer_strategy = single_active},
+ consumers = Consumers}) ->
+ case maps:size(Consumers) of
+ 0 ->
+ {error, no_value};
+ 1 ->
+ {value, lists:nth(1, maps:keys(Consumers))};
+ _
+ ->
+ {error, illegal_size}
+ end ;
+query_single_active_consumer(_) ->
+ disabled.
+
+query_stat(#?STATE{consumers = Consumers} = State) ->
+ {messages_ready(State), maps:size(Consumers)}.
+
+query_in_memory_usage(#?STATE{msg_bytes_in_memory = Bytes,
+ msgs_ready_in_memory = Length}) ->
+ {Length, Bytes}.
+
+-spec usage(atom()) -> float().
+usage(Name) when is_atom(Name) ->
+ case ets:lookup(rabbit_fifo_usage, Name) of
+ [] -> 0.0;
+ [{_, Use}] -> Use
+ end.
+
+%%% Internal
+
+messages_ready(#?STATE{messages = M,
+ prefix_msgs = {RCnt, _R, PCnt, _P},
+ returns = R}) ->
+
+ %% prefix messages will rarely have anything in them during normal
+ %% operations so length/1 is fine here
+ maps:size(M) + lqueue:len(R) + RCnt + PCnt.
+
+messages_total(#?STATE{ra_indexes = I,
+ prefix_msgs = {RCnt, _R, PCnt, _P}}) ->
+ rabbit_fifo_index:size(I) + RCnt + PCnt.
+
+update_use({inactive, _, _, _} = CUInfo, inactive) ->
+ CUInfo;
+update_use({active, _, _} = CUInfo, active) ->
+ CUInfo;
+update_use({active, Since, Avg}, inactive) ->
+ Now = erlang:monotonic_time(micro_seconds),
+ {inactive, Now, Now - Since, Avg};
+update_use({inactive, Since, Active, Avg}, active) ->
+ Now = erlang:monotonic_time(micro_seconds),
+ {active, Now, use_avg(Active, Now - Since, Avg)}.
+
+utilisation({active, Since, Avg}) ->
+ use_avg(erlang:monotonic_time(micro_seconds) - Since, 0, Avg);
+utilisation({inactive, Since, Active, Avg}) ->
+ use_avg(Active, erlang:monotonic_time(micro_seconds) - Since, Avg).
+
+use_avg(0, 0, Avg) ->
+ Avg;
+use_avg(Active, Inactive, Avg) ->
+ Time = Inactive + Active,
+ moving_average(Time, ?USE_AVG_HALF_LIFE, Active / Time, Avg).
+
+moving_average(_Time, _, Next, undefined) ->
+ Next;
+moving_average(Time, HalfLife, Next, Current) ->
+ Weight = math:exp(Time * math:log(0.5) / HalfLife),
+ Next * (1 - Weight) + Current * Weight.
+
+num_checked_out(#?STATE{consumers = Cons}) ->
+ maps:fold(fun (_, #consumer{checked_out = C}, Acc) ->
+ maps:size(C) + Acc
+ end, 0, Cons).
+
+cancel_consumer(ConsumerId,
+ #?STATE{cfg = #cfg{consumer_strategy = competing}} = State,
+ Effects, Reason) ->
+ cancel_consumer0(ConsumerId, State, Effects, Reason);
+cancel_consumer(ConsumerId,
+ #?STATE{cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = []} = State,
+ Effects, Reason) ->
+ %% single active consumer on, no consumers are waiting
+ cancel_consumer0(ConsumerId, State, Effects, Reason);
+cancel_consumer(ConsumerId,
+ #?STATE{consumers = Cons0,
+ cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = Waiting0} = State0,
+ Effects0, Reason) ->
+ %% single active consumer on, consumers are waiting
+ case maps:is_key(ConsumerId, Cons0) of
+ true ->
+ % The active consumer is to be removed
+ {State1, Effects1} = cancel_consumer0(ConsumerId, State0,
+ Effects0, Reason),
+ activate_next_consumer(State1, Effects1);
+ false ->
+ % The cancelled consumer is not active or cancelled
+ % Just remove it from idle_consumers
+ Waiting = lists:keydelete(ConsumerId, 1, Waiting0),
+ Effects = cancel_consumer_effects(ConsumerId, State0, Effects0),
+ % A waiting consumer isn't supposed to have any checked out messages,
+ % so nothing special to do here
+ {State0#?STATE{waiting_consumers = Waiting}, Effects}
+ end.
+
+consumer_update_active_effects(#?STATE{cfg = #cfg{resource = QName}},
+ ConsumerId, #consumer{meta = Meta},
+ Active, ActivityStatus,
+ Effects) ->
+ Ack = maps:get(ack, Meta, undefined),
+ Prefetch = maps:get(prefetch, Meta, undefined),
+ Args = maps:get(args, Meta, []),
+ [{mod_call, rabbit_quorum_queue, update_consumer_handler,
+ [QName, ConsumerId, false, Ack, Prefetch, Active, ActivityStatus, Args]}
+ | Effects].
+
+cancel_consumer0(ConsumerId, #?STATE{consumers = C0} = S0, Effects0, Reason) ->
+ case C0 of
+ #{ConsumerId := Consumer} ->
+ {S, Effects2} = maybe_return_all(ConsumerId, Consumer, S0,
+ Effects0, Reason),
+ %% The effects are emitted before the consumer is actually removed
+ %% if the consumer has unacked messages. This is a bit weird but
+ %% in line with what classic queues do (from an external point of
+ %% view)
+ Effects = cancel_consumer_effects(ConsumerId, S, Effects2),
+ case maps:size(S#?STATE.consumers) of
+ 0 ->
+ {S, [{aux, inactive} | Effects]};
+ _ ->
+ {S, Effects}
+ end;
+ _ ->
+ %% already removed: do nothing
+ {S0, Effects0}
+ end.
+
+activate_next_consumer(#?STATE{consumers = Cons,
+ waiting_consumers = Waiting0} = State0,
+ Effects0) ->
+ case maps:filter(fun (_, #consumer{status = S}) -> S == up end, Cons) of
+ Up when map_size(Up) == 0 ->
+ %% there are no active consumer in the consumer map
+ case lists:filter(fun ({_, #consumer{status = Status}}) ->
+ Status == up
+ end, Waiting0) of
+ [{NextConsumerId, NextConsumer} | _] ->
+ %% there is a potential next active consumer
+ Remaining = lists:keydelete(NextConsumerId, 1, Waiting0),
+ #?STATE{service_queue = ServiceQueue} = State0,
+ ServiceQueue1 = maybe_queue_consumer(NextConsumerId,
+ NextConsumer,
+ ServiceQueue),
+ State = State0#?STATE{consumers = Cons#{NextConsumerId => NextConsumer},
+ service_queue = ServiceQueue1,
+ waiting_consumers = Remaining},
+ Effects = consumer_update_active_effects(State, NextConsumerId,
+ NextConsumer, true,
+ single_active, Effects0),
+ {State, Effects};
+ [] ->
+ {State0, [{aux, inactive} | Effects0]}
+ end;
+ _ ->
+ {State0, Effects0}
+ end.
+
+
+
+maybe_return_all(ConsumerId, Consumer,
+ #?STATE{consumers = C0,
+ service_queue = SQ0} = S0,
+ Effects0, Reason) ->
+ case Reason of
+ consumer_cancel ->
+ {Cons, SQ, Effects1} =
+ update_or_remove_sub(ConsumerId,
+ Consumer#consumer{lifetime = once,
+ credit = 0,
+ status = cancelled},
+ C0, SQ0, Effects0),
+ {S0#?STATE{consumers = Cons,
+ service_queue = SQ}, Effects1};
+ down ->
+ {S1, Effects1} = return_all(S0, Effects0, ConsumerId, Consumer),
+ {S1#?STATE{consumers = maps:remove(ConsumerId, S1#?STATE.consumers)},
+ Effects1}
+ end.
+
+apply_enqueue(#{index := RaftIdx} = Meta, From, Seq, RawMsg, State0) ->
+ case maybe_enqueue(RaftIdx, From, Seq, RawMsg, [], State0) of
+ {ok, State1, Effects1} ->
+ State2 = append_to_master_index(RaftIdx, State1),
+ {State, ok, Effects} = checkout(Meta, State2, Effects1),
+ {maybe_store_dehydrated_state(RaftIdx, State), ok, Effects};
+ {duplicate, State, Effects} ->
+ {State, ok, Effects}
+ end.
+
+drop_head(#?STATE{ra_indexes = Indexes0} = State0, Effects0) ->
+ case take_next_msg(State0) of
+ {FullMsg = {_MsgId, {RaftIdxToDrop, {Header, Msg}}},
+ State1} ->
+ Indexes = rabbit_fifo_index:delete(RaftIdxToDrop, Indexes0),
+ State2 = add_bytes_drop(Header, State1#?STATE{ra_indexes = Indexes}),
+ State = case Msg of
+ 'empty' -> State2;
+ _ -> subtract_in_memory_counts(Header, State2)
+ end,
+ Effects = dead_letter_effects(maxlen, #{none => FullMsg},
+ State, Effects0),
+ {State, Effects};
+ {{'$prefix_msg', Header}, State1} ->
+ State2 = subtract_in_memory_counts(Header, add_bytes_drop(Header, State1)),
+ {State2, Effects0};
+ {{'$empty_msg', Header}, State1} ->
+ State2 = add_bytes_drop(Header, State1),
+ {State2, Effects0};
+ empty ->
+ {State0, Effects0}
+ end.
+
+enqueue(RaftIdx, RawMsg, #?STATE{messages = Messages,
+ low_msg_num = LowMsgNum,
+ next_msg_num = NextMsgNum} = State0) ->
+ %% the initial header is an integer only - it will get expanded to a map
+ %% when the next required key is added
+ Header = message_size(RawMsg),
+ {State1, Msg} =
+ case evaluate_memory_limit(Header, State0) of
+ true ->
+ % indexed message with header map
+ {State0, {RaftIdx, {Header, 'empty'}}};
+ false ->
+ {add_in_memory_counts(Header, State0),
+ {RaftIdx, {Header, RawMsg}}} % indexed message with header map
+ end,
+ State = add_bytes_enqueue(Header, State1),
+ State#?STATE{messages = Messages#{NextMsgNum => Msg},
+ %% this is probably only done to record it when low_msg_num
+ %% is undefined
+ low_msg_num = min(LowMsgNum, NextMsgNum),
+ next_msg_num = NextMsgNum + 1}.
+
+append_to_master_index(RaftIdx,
+ #?STATE{ra_indexes = Indexes0} = State0) ->
+ State = incr_enqueue_count(State0),
+ Indexes = rabbit_fifo_index:append(RaftIdx, Indexes0),
+ State#?STATE{ra_indexes = Indexes}.
+
+
+incr_enqueue_count(#?STATE{enqueue_count = C,
+ cfg = #cfg{release_cursor_interval = {_Base, C}}
+ } = State0) ->
+ %% this will trigger a dehydrated version of the state to be stored
+ %% at this raft index for potential future snapshot generation
+ %% Q: Why don't we just stash the release cursor here?
+ %% A: Because it needs to be the very last thing we do and we
+ %% first needs to run the checkout logic.
+ State0#?STATE{enqueue_count = 0};
+incr_enqueue_count(#?STATE{cfg = #cfg{release_cursor_interval = C} = Cfg}
+ = State0)
+ when is_integer(C) ->
+ %% conversion to new release cursor interval format
+ State = State0#?STATE{cfg = Cfg#cfg{release_cursor_interval = {C, C}}},
+ incr_enqueue_count(State);
+incr_enqueue_count(#?STATE{enqueue_count = C} = State) ->
+ State#?STATE{enqueue_count = C + 1}.
+
+maybe_store_dehydrated_state(RaftIdx,
+ #?STATE{cfg =
+ #cfg{release_cursor_interval = {Base, _}}
+ = Cfg,
+ ra_indexes = Indexes,
+ enqueue_count = 0,
+ release_cursors = Cursors0} = State0) ->
+ case rabbit_fifo_index:exists(RaftIdx, Indexes) of
+ false ->
+ %% the incoming enqueue must already have been dropped
+ State0;
+ true ->
+ Interval = case Base of
+ 0 -> 0;
+ _ ->
+ Total = messages_total(State0),
+ min(max(Total, Base),
+ ?RELEASE_CURSOR_EVERY_MAX)
+ end,
+ State = convert_prefix_msgs(
+ State0#?STATE{cfg = Cfg#cfg{release_cursor_interval =
+ {Base, Interval}}}),
+ Dehydrated = dehydrate_state(State),
+ Cursor = {release_cursor, RaftIdx, Dehydrated},
+ Cursors = lqueue:in(Cursor, Cursors0),
+ State#?STATE{release_cursors = Cursors}
+ end;
+maybe_store_dehydrated_state(RaftIdx,
+ #?STATE{cfg =
+ #cfg{release_cursor_interval = C} = Cfg}
+ = State0)
+ when is_integer(C) ->
+ %% convert to new format
+ State = State0#?STATE{cfg = Cfg#cfg{release_cursor_interval = {C, C}}},
+ maybe_store_dehydrated_state(RaftIdx, State);
+maybe_store_dehydrated_state(_RaftIdx, State) ->
+ State.
+
+enqueue_pending(From,
+ #enqueuer{next_seqno = Next,
+ pending = [{Next, RaftIdx, RawMsg} | Pending]} = Enq0,
+ State0) ->
+ State = enqueue(RaftIdx, RawMsg, State0),
+ Enq = Enq0#enqueuer{next_seqno = Next + 1, pending = Pending},
+ enqueue_pending(From, Enq, State);
+enqueue_pending(From, Enq, #?STATE{enqueuers = Enqueuers0} = State) ->
+ State#?STATE{enqueuers = Enqueuers0#{From => Enq}}.
+
+maybe_enqueue(RaftIdx, undefined, undefined, RawMsg, Effects, State0) ->
+ % direct enqueue without tracking
+ State = enqueue(RaftIdx, RawMsg, State0),
+ {ok, State, Effects};
+maybe_enqueue(RaftIdx, From, MsgSeqNo, RawMsg, Effects0,
+ #?STATE{enqueuers = Enqueuers0} = State0) ->
+ case maps:get(From, Enqueuers0, undefined) of
+ undefined ->
+ State1 = State0#?STATE{enqueuers = Enqueuers0#{From => #enqueuer{}}},
+ {ok, State, Effects} = maybe_enqueue(RaftIdx, From, MsgSeqNo,
+ RawMsg, Effects0, State1),
+ {ok, State, [{monitor, process, From} | Effects]};
+ #enqueuer{next_seqno = MsgSeqNo} = Enq0 ->
+ % it is the next expected seqno
+ State1 = enqueue(RaftIdx, RawMsg, State0),
+ Enq = Enq0#enqueuer{next_seqno = MsgSeqNo + 1},
+ State = enqueue_pending(From, Enq, State1),
+ {ok, State, Effects0};
+ #enqueuer{next_seqno = Next,
+ pending = Pending0} = Enq0
+ when MsgSeqNo > Next ->
+ % out of order delivery
+ Pending = [{MsgSeqNo, RaftIdx, RawMsg} | Pending0],
+ Enq = Enq0#enqueuer{pending = lists:sort(Pending)},
+ {ok, State0#?STATE{enqueuers = Enqueuers0#{From => Enq}}, Effects0};
+ #enqueuer{next_seqno = Next} when MsgSeqNo =< Next ->
+ % duplicate delivery - remove the raft index from the ra_indexes
+ % map as it was added earlier
+ {duplicate, State0, Effects0}
+ end.
+
+snd(T) ->
+ element(2, T).
+
+return(#{index := IncomingRaftIdx} = Meta, ConsumerId, Returned,
+ Effects0, #?STATE{service_queue = SQ0} = State0) ->
+ {State1, Effects1} = maps:fold(
+ fun(MsgId, {Tag, _} = Msg, {S0, E0})
+ when Tag == '$prefix_msg';
+ Tag == '$empty_msg'->
+ return_one(MsgId, 0, Msg, S0, E0, ConsumerId);
+ (MsgId, {MsgNum, Msg}, {S0, E0}) ->
+ return_one(MsgId, MsgNum, Msg, S0, E0,
+ ConsumerId)
+ end, {State0, Effects0}, Returned),
+ {State2, Effects3} =
+ case State1#?STATE.consumers of
+ #{ConsumerId := Con0} = Cons0 ->
+ Con = Con0#consumer{credit = increase_credit(Con0,
+ map_size(Returned))},
+ {Cons, SQ, Effects2} = update_or_remove_sub(ConsumerId, Con,
+ Cons0, SQ0, Effects1),
+ {State1#?STATE{consumers = Cons,
+ service_queue = SQ}, Effects2};
+ _ ->
+ {State1, Effects1}
+ end,
+ {State, ok, Effects} = checkout(Meta, State2, Effects3),
+ update_smallest_raft_index(IncomingRaftIdx, State, Effects).
+
+% used to processes messages that are finished
+complete(ConsumerId, Discarded,
+ #consumer{checked_out = Checked} = Con0, Effects0,
+ #?STATE{consumers = Cons0, service_queue = SQ0,
+ ra_indexes = Indexes0} = State0) ->
+ %% TODO optimise use of Discarded map here
+ MsgRaftIdxs = [RIdx || {_, {RIdx, _}} <- maps:values(Discarded)],
+ %% credit_mode = simple_prefetch should automatically top-up credit
+ %% as messages are simple_prefetch or otherwise returned
+ Con = Con0#consumer{checked_out = maps:without(maps:keys(Discarded), Checked),
+ credit = increase_credit(Con0, map_size(Discarded))},
+ {Cons, SQ, Effects} = update_or_remove_sub(ConsumerId, Con, Cons0,
+ SQ0, Effects0),
+ Indexes = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes0,
+ MsgRaftIdxs),
+ %% TODO: use maps:fold instead
+ State1 = lists:foldl(fun({_, {_, {Header, _}}}, Acc) ->
+ add_bytes_settle(Header, Acc);
+ ({'$prefix_msg', Header}, Acc) ->
+ add_bytes_settle(Header, Acc);
+ ({'$empty_msg', Header}, Acc) ->
+ add_bytes_settle(Header, Acc)
+ end, State0, maps:values(Discarded)),
+ {State1#?STATE{consumers = Cons,
+ ra_indexes = Indexes,
+ service_queue = SQ}, Effects}.
+
+increase_credit(#consumer{lifetime = once,
+ credit = Credit}, _) ->
+ %% once consumers cannot increment credit
+ Credit;
+increase_credit(#consumer{lifetime = auto,
+ credit_mode = credited,
+ credit = Credit}, _) ->
+ %% credit_mode: credit also doesn't automatically increment credit
+ Credit;
+increase_credit(#consumer{credit = Current}, Credit) ->
+ Current + Credit.
+
+complete_and_checkout(#{index := IncomingRaftIdx} = Meta, MsgIds, ConsumerId,
+ #consumer{checked_out = Checked0} = Con0,
+ Effects0, State0) ->
+ Discarded = maps:with(MsgIds, Checked0),
+ {State2, Effects1} = complete(ConsumerId, Discarded, Con0,
+ Effects0, State0),
+ {State, ok, Effects} = checkout(Meta, State2, Effects1),
+ update_smallest_raft_index(IncomingRaftIdx, State, Effects).
+
+dead_letter_effects(_Reason, _Discarded,
+ #?STATE{cfg = #cfg{dead_letter_handler = undefined}},
+ Effects) ->
+ Effects;
+dead_letter_effects(Reason, Discarded,
+ #?STATE{cfg = #cfg{dead_letter_handler = {Mod, Fun, Args}}},
+ Effects) ->
+ RaftIdxs = maps:fold(
+ fun (_, {_, {RaftIdx, {_Header, 'empty'}}}, Acc) ->
+ [RaftIdx | Acc];
+ (_, _, Acc) ->
+ Acc
+ end, [], Discarded),
+ [{log, RaftIdxs,
+ fun (Log) ->
+ Lookup = maps:from_list(lists:zip(RaftIdxs, Log)),
+ DeadLetters = maps:fold(
+ fun (_, {_, {RaftIdx, {_Header, 'empty'}}}, Acc) ->
+ {enqueue, _, _, Msg} = maps:get(RaftIdx, Lookup),
+ [{Reason, Msg} | Acc];
+ (_, {_, {_, {_Header, Msg}}}, Acc) ->
+ [{Reason, Msg} | Acc];
+ (_, _, Acc) ->
+ Acc
+ end, [], Discarded),
+ [{mod_call, Mod, Fun, Args ++ [DeadLetters]}]
+ end} | Effects].
+
+cancel_consumer_effects(ConsumerId,
+ #?STATE{cfg = #cfg{resource = QName}}, Effects) ->
+ [{mod_call, rabbit_quorum_queue,
+ cancel_consumer_handler, [QName, ConsumerId]} | Effects].
+
+update_smallest_raft_index(IncomingRaftIdx,
+ #?STATE{ra_indexes = Indexes,
+ release_cursors = Cursors0} = State0,
+ Effects) ->
+ case rabbit_fifo_index:size(Indexes) of
+ 0 ->
+ % there are no messages on queue anymore and no pending enqueues
+ % we can forward release_cursor all the way until
+ % the last received command, hooray
+ State = State0#?STATE{release_cursors = lqueue:new()},
+ {State, ok, Effects ++ [{release_cursor, IncomingRaftIdx, State}]};
+ _ ->
+ Smallest = rabbit_fifo_index:smallest(Indexes),
+ case find_next_cursor(Smallest, Cursors0) of
+ {empty, Cursors} ->
+ {State0#?STATE{release_cursors = Cursors},
+ ok, Effects};
+ {Cursor, Cursors} ->
+ %% we can emit a release cursor we've passed the smallest
+ %% release cursor available.
+ {State0#?STATE{release_cursors = Cursors}, ok,
+ Effects ++ [Cursor]}
+ end
+ end.
+
+find_next_cursor(Idx, Cursors) ->
+ find_next_cursor(Idx, Cursors, empty).
+
+find_next_cursor(Smallest, Cursors0, Potential) ->
+ case lqueue:out(Cursors0) of
+ {{value, {_, Idx, _} = Cursor}, Cursors} when Idx < Smallest ->
+ %% we found one but it may not be the largest one
+ find_next_cursor(Smallest, Cursors, Cursor);
+ _ ->
+ {Potential, Cursors0}
+ end.
+
+update_header(Key, UpdateFun, Default, Header)
+ when is_integer(Header) ->
+ update_header(Key, UpdateFun, Default, #{size => Header});
+update_header(Key, UpdateFun, Default, Header) ->
+ maps:update_with(Key, UpdateFun, Default, Header).
+
+
+return_one(MsgId, 0, {Tag, Header0},
+ #?STATE{returns = Returns,
+ consumers = Consumers,
+ cfg = #cfg{delivery_limit = DeliveryLimit}} = State0,
+ Effects0, ConsumerId)
+ when Tag == '$prefix_msg'; Tag == '$empty_msg' ->
+ #consumer{checked_out = Checked} = Con0 = maps:get(ConsumerId, Consumers),
+ Header = update_header(delivery_count, fun (C) -> C+1 end, 1, Header0),
+ Msg0 = {Tag, Header},
+ case maps:get(delivery_count, Header) of
+ DeliveryCount when DeliveryCount > DeliveryLimit ->
+ complete(ConsumerId, #{MsgId => Msg0}, Con0, Effects0, State0);
+ _ ->
+ %% this should not affect the release cursor in any way
+ Con = Con0#consumer{checked_out = maps:remove(MsgId, Checked)},
+ {Msg, State1} = case Tag of
+ '$empty_msg' ->
+ {Msg0, State0};
+ _ -> case evaluate_memory_limit(Header, State0) of
+ true ->
+ {{'$empty_msg', Header}, State0};
+ false ->
+ {Msg0, add_in_memory_counts(Header, State0)}
+ end
+ end,
+ {add_bytes_return(
+ Header,
+ State1#?STATE{consumers = Consumers#{ConsumerId => Con},
+ returns = lqueue:in(Msg, Returns)}),
+ Effects0}
+ end;
+return_one(MsgId, MsgNum, {RaftId, {Header0, RawMsg}},
+ #?STATE{returns = Returns,
+ consumers = Consumers,
+ cfg = #cfg{delivery_limit = DeliveryLimit}} = State0,
+ Effects0, ConsumerId) ->
+ #consumer{checked_out = Checked} = Con0 = maps:get(ConsumerId, Consumers),
+ Header = update_header(delivery_count, fun (C) -> C+1 end, 1, Header0),
+ Msg0 = {RaftId, {Header, RawMsg}},
+ case maps:get(delivery_count, Header) of
+ DeliveryCount when DeliveryCount > DeliveryLimit ->
+ DlMsg = {MsgNum, Msg0},
+ Effects = dead_letter_effects(delivery_limit, #{none => DlMsg},
+ State0, Effects0),
+ complete(ConsumerId, #{MsgId => DlMsg}, Con0, Effects, State0);
+ _ ->
+ Con = Con0#consumer{checked_out = maps:remove(MsgId, Checked)},
+ %% this should not affect the release cursor in any way
+ {Msg, State1} = case RawMsg of
+ 'empty' ->
+ {Msg0, State0};
+ _ ->
+ case evaluate_memory_limit(Header, State0) of
+ true ->
+ {{RaftId, {Header, 'empty'}}, State0};
+ false ->
+ {Msg0, add_in_memory_counts(Header, State0)}
+ end
+ end,
+ {add_bytes_return(
+ Header,
+ State1#?STATE{consumers = Consumers#{ConsumerId => Con},
+ returns = lqueue:in({MsgNum, Msg}, Returns)}),
+ Effects0}
+ end.
+
+return_all(#?STATE{consumers = Cons} = State0, Effects0, ConsumerId,
+ #consumer{checked_out = Checked0} = Con) ->
+ %% need to sort the list so that we return messages in the order
+ %% they were checked out
+ Checked = lists:sort(maps:to_list(Checked0)),
+ State = State0#?STATE{consumers = Cons#{ConsumerId => Con}},
+ lists:foldl(fun ({MsgId, {'$prefix_msg', _} = Msg}, {S, E}) ->
+ return_one(MsgId, 0, Msg, S, E, ConsumerId);
+ ({MsgId, {'$empty_msg', _} = Msg}, {S, E}) ->
+ return_one(MsgId, 0, Msg, S, E, ConsumerId);
+ ({MsgId, {MsgNum, Msg}}, {S, E}) ->
+ return_one(MsgId, MsgNum, Msg, S, E, ConsumerId)
+ end, {State, Effects0}, Checked).
+
+%% checkout new messages to consumers
+checkout(#{index := Index}, State0, Effects0) ->
+ {State1, _Result, Effects1} = checkout0(checkout_one(State0),
+ Effects0, {#{}, #{}}),
+ case evaluate_limit(false, State1, Effects1) of
+ {State, true, Effects} ->
+ update_smallest_raft_index(Index, State, Effects);
+ {State, false, Effects} ->
+ {State, ok, Effects}
+ end.
+
+checkout0({success, ConsumerId, MsgId, {RaftIdx, {Header, 'empty'}}, State},
+ Effects, {SendAcc, LogAcc0}) ->
+ DelMsg = {RaftIdx, {MsgId, Header}},
+ LogAcc = maps:update_with(ConsumerId,
+ fun (M) -> [DelMsg | M] end,
+ [DelMsg], LogAcc0),
+ checkout0(checkout_one(State), Effects, {SendAcc, LogAcc});
+checkout0({success, ConsumerId, MsgId, Msg, State}, Effects,
+ {SendAcc0, LogAcc}) ->
+ DelMsg = {MsgId, Msg},
+ SendAcc = maps:update_with(ConsumerId,
+ fun (M) -> [DelMsg | M] end,
+ [DelMsg], SendAcc0),
+ checkout0(checkout_one(State), Effects, {SendAcc, LogAcc});
+checkout0({Activity, State0}, Effects0, {SendAcc, LogAcc}) ->
+ Effects1 = case Activity of
+ nochange ->
+ append_send_msg_effects(
+ append_log_effects(Effects0, LogAcc), SendAcc);
+ inactive ->
+ [{aux, inactive}
+ | append_send_msg_effects(
+ append_log_effects(Effects0, LogAcc), SendAcc)]
+ end,
+ {State0, ok, lists:reverse(Effects1)}.
+
+evaluate_limit(Result,
+ #?STATE{cfg = #cfg{max_length = undefined,
+ max_bytes = undefined}} = State,
+ Effects) ->
+ {State, Result, Effects};
+evaluate_limit(Result, State00, Effects0) ->
+ State0 = convert_prefix_msgs(State00),
+ case is_over_limit(State0) of
+ true ->
+ {State, Effects} = drop_head(State0, Effects0),
+ evaluate_limit(true, State, Effects);
+ false ->
+ {State0, Result, Effects0}
+ end.
+
+evaluate_memory_limit(_Header,
+ #?STATE{cfg = #cfg{max_in_memory_length = undefined,
+ max_in_memory_bytes = undefined}}) ->
+ false;
+evaluate_memory_limit(#{size := Size}, State) ->
+ evaluate_memory_limit(Size, State);
+evaluate_memory_limit(Size,
+ #?STATE{cfg = #cfg{max_in_memory_length = MaxLength,
+ max_in_memory_bytes = MaxBytes},
+ msg_bytes_in_memory = Bytes,
+ msgs_ready_in_memory = Length})
+ when is_integer(Size) ->
+ (Length >= MaxLength) orelse ((Bytes + Size) > MaxBytes).
+
+append_send_msg_effects(Effects, AccMap) when map_size(AccMap) == 0 ->
+ Effects;
+append_send_msg_effects(Effects0, AccMap) ->
+ Effects = maps:fold(fun (C, Msgs, Ef) ->
+ [send_msg_effect(C, lists:reverse(Msgs)) | Ef]
+ end, Effects0, AccMap),
+ [{aux, active} | Effects].
+
+append_log_effects(Effects0, AccMap) ->
+ maps:fold(fun (C, Msgs, Ef) ->
+ [send_log_effect(C, lists:reverse(Msgs)) | Ef]
+ end, Effects0, AccMap).
+
+%% next message is determined as follows:
+%% First we check if there are are prefex returns
+%% Then we check if there are current returns
+%% then we check prefix msgs
+%% then we check current messages
+%%
+%% When we return it is always done to the current return queue
+%% for both prefix messages and current messages
+take_next_msg(#?STATE{prefix_msgs = {R, P}} = State) ->
+ %% conversion
+ take_next_msg(State#?STATE{prefix_msgs = {length(R), R, length(P), P}});
+take_next_msg(#?STATE{prefix_msgs = {NumR, [{'$empty_msg', _} = Msg | Rem],
+ NumP, P}} = State) ->
+ %% there are prefix returns, these should be served first
+ {Msg, State#?STATE{prefix_msgs = {NumR-1, Rem, NumP, P}}};
+take_next_msg(#?STATE{prefix_msgs = {NumR, [Header | Rem], NumP, P}} = State) ->
+ %% there are prefix returns, these should be served first
+ {{'$prefix_msg', Header},
+ State#?STATE{prefix_msgs = {NumR-1, Rem, NumP, P}}};
+take_next_msg(#?STATE{returns = Returns,
+ low_msg_num = Low0,
+ messages = Messages0,
+ prefix_msgs = {NumR, R, NumP, P}} = State) ->
+ %% use peek rather than out there as the most likely case is an empty
+ %% queue
+ case lqueue:peek(Returns) of
+ {value, NextMsg} ->
+ {NextMsg,
+ State#?STATE{returns = lqueue:drop(Returns)}};
+ empty when P == [] ->
+ case Low0 of
+ undefined ->
+ empty;
+ _ ->
+ {Msg, Messages} = maps:take(Low0, Messages0),
+ case maps:size(Messages) of
+ 0 ->
+ {{Low0, Msg},
+ State#?STATE{messages = Messages,
+ low_msg_num = undefined}};
+ _ ->
+ {{Low0, Msg},
+ State#?STATE{messages = Messages,
+ low_msg_num = Low0 + 1}}
+ end
+ end;
+ empty ->
+ [Msg | Rem] = P,
+ case Msg of
+ {Header, 'empty'} ->
+ %% There are prefix msgs
+ {{'$empty_msg', Header},
+ State#?STATE{prefix_msgs = {NumR, R, NumP-1, Rem}}};
+ Header ->
+ {{'$prefix_msg', Header},
+ State#?STATE{prefix_msgs = {NumR, R, NumP-1, Rem}}}
+ end
+ end.
+
+send_msg_effect({CTag, CPid}, Msgs) ->
+ {send_msg, CPid, {delivery, CTag, Msgs}, [local, ra_event]}.
+
+send_log_effect({CTag, CPid}, IdxMsgs) ->
+ {RaftIdxs, Data} = lists:unzip(IdxMsgs),
+ {log, RaftIdxs,
+ fun(Log) ->
+ Msgs = lists:zipwith(fun ({enqueue, _, _, Msg}, {MsgId, Header}) ->
+ {MsgId, {Header, Msg}}
+ end, Log, Data),
+ [{send_msg, CPid, {delivery, CTag, Msgs}, [local, ra_event]}]
+ end,
+ {local, node(CPid)}}.
+
+reply_log_effect(RaftIdx, MsgId, Header, Ready, From) ->
+ {log, [RaftIdx],
+ fun([{enqueue, _, _, Msg}]) ->
+ [{reply, From, {wrap_reply,
+ {dequeue, {MsgId, {Header, Msg}}, Ready}}}]
+ end}.
+
+checkout_one(#?STATE{service_queue = SQ0,
+ messages = Messages0,
+ consumers = Cons0} = InitState) ->
+ case queue:peek(SQ0) of
+ {value, ConsumerId} ->
+ case take_next_msg(InitState) of
+ {ConsumerMsg, State0} ->
+ SQ1 = queue:drop(SQ0),
+ %% there are consumers waiting to be serviced
+ %% process consumer checkout
+ case maps:find(ConsumerId, Cons0) of
+ {ok, #consumer{credit = 0}} ->
+ %% no credit but was still on queue
+ %% can happen when draining
+ %% recurse without consumer on queue
+ checkout_one(InitState#?STATE{service_queue = SQ1});
+ {ok, #consumer{status = cancelled}} ->
+ checkout_one(InitState#?STATE{service_queue = SQ1});
+ {ok, #consumer{status = suspected_down}} ->
+ checkout_one(InitState#?STATE{service_queue = SQ1});
+ {ok, #consumer{checked_out = Checked0,
+ next_msg_id = Next,
+ credit = Credit,
+ delivery_count = DelCnt} = Con0} ->
+ Checked = maps:put(Next, ConsumerMsg, Checked0),
+ Con = Con0#consumer{checked_out = Checked,
+ next_msg_id = Next + 1,
+ credit = Credit - 1,
+ delivery_count = DelCnt + 1},
+ {Cons, SQ, []} = % we expect no effects
+ update_or_remove_sub(ConsumerId, Con,
+ Cons0, SQ1, []),
+ State1 = State0#?STATE{service_queue = SQ,
+ consumers = Cons},
+ {State, Msg} =
+ case ConsumerMsg of
+ {'$prefix_msg', Header} ->
+ {subtract_in_memory_counts(
+ Header, add_bytes_checkout(Header, State1)),
+ ConsumerMsg};
+ {'$empty_msg', Header} ->
+ {add_bytes_checkout(Header, State1),
+ ConsumerMsg};
+ {_, {_, {Header, 'empty'}} = M} ->
+ {add_bytes_checkout(Header, State1),
+ M};
+ {_, {_, {Header, _} = M}} ->
+ {subtract_in_memory_counts(
+ Header,
+ add_bytes_checkout(Header, State1)),
+ M}
+ end,
+ {success, ConsumerId, Next, Msg, State};
+ error ->
+ %% consumer did not exist but was queued, recurse
+ checkout_one(InitState#?STATE{service_queue = SQ1})
+ end;
+ empty ->
+ {nochange, InitState}
+ end;
+ empty ->
+ case maps:size(Messages0) of
+ 0 -> {nochange, InitState};
+ _ -> {inactive, InitState}
+ end
+ end.
+
+update_or_remove_sub(ConsumerId, #consumer{lifetime = auto,
+ credit = 0} = Con,
+ Cons, ServiceQueue, Effects) ->
+ {maps:put(ConsumerId, Con, Cons), ServiceQueue, Effects};
+update_or_remove_sub(ConsumerId, #consumer{lifetime = auto} = Con,
+ Cons, ServiceQueue, Effects) ->
+ {maps:put(ConsumerId, Con, Cons),
+ uniq_queue_in(ConsumerId, ServiceQueue), Effects};
+update_or_remove_sub(ConsumerId, #consumer{lifetime = once,
+ checked_out = Checked,
+ credit = 0} = Con,
+ Cons, ServiceQueue, Effects) ->
+ case maps:size(Checked) of
+ 0 ->
+ % we're done with this consumer
+ % TODO: demonitor consumer pid but _only_ if there are no other
+ % monitors for this pid
+ {maps:remove(ConsumerId, Cons), ServiceQueue, Effects};
+ _ ->
+ % there are unsettled items so need to keep around
+ {maps:put(ConsumerId, Con, Cons), ServiceQueue, Effects}
+ end;
+update_or_remove_sub(ConsumerId, #consumer{lifetime = once} = Con,
+ Cons, ServiceQueue, Effects) ->
+ {maps:put(ConsumerId, Con, Cons),
+ uniq_queue_in(ConsumerId, ServiceQueue), Effects}.
+
+uniq_queue_in(Key, Queue) ->
+ % TODO: queue:member could surely be quite expensive, however the practical
+ % number of unique consumers may not be large enough for it to matter
+ case queue:member(Key, Queue) of
+ true ->
+ Queue;
+ false ->
+ queue:in(Key, Queue)
+ end.
+
+update_consumer(ConsumerId, Meta, Spec,
+ #?STATE{cfg = #cfg{consumer_strategy = competing}} = State0) ->
+ %% general case, single active consumer off
+ update_consumer0(ConsumerId, Meta, Spec, State0);
+update_consumer(ConsumerId, Meta, Spec,
+ #?STATE{consumers = Cons0,
+ cfg = #cfg{consumer_strategy = single_active}} = State0)
+ when map_size(Cons0) == 0 ->
+ %% single active consumer on, no one is consuming yet
+ update_consumer0(ConsumerId, Meta, Spec, State0);
+update_consumer(ConsumerId, Meta, {Life, Credit, Mode},
+ #?STATE{cfg = #cfg{consumer_strategy = single_active},
+ waiting_consumers = WaitingConsumers0} = State0) ->
+ %% single active consumer on and one active consumer already
+ %% adding the new consumer to the waiting list
+ Consumer = #consumer{lifetime = Life, meta = Meta,
+ credit = Credit, credit_mode = Mode},
+ WaitingConsumers1 = WaitingConsumers0 ++ [{ConsumerId, Consumer}],
+ State0#?STATE{waiting_consumers = WaitingConsumers1}.
+
+update_consumer0(ConsumerId, Meta, {Life, Credit, Mode},
+ #?STATE{consumers = Cons0,
+ service_queue = ServiceQueue0} = State0) ->
+ %% TODO: this logic may not be correct for updating a pre-existing consumer
+ Init = #consumer{lifetime = Life, meta = Meta,
+ credit = Credit, credit_mode = Mode},
+ Cons = maps:update_with(ConsumerId,
+ fun(S) ->
+ %% remove any in-flight messages from
+ %% the credit update
+ N = maps:size(S#consumer.checked_out),
+ C = max(0, Credit - N),
+ S#consumer{lifetime = Life, credit = C}
+ end, Init, Cons0),
+ ServiceQueue = maybe_queue_consumer(ConsumerId, maps:get(ConsumerId, Cons),
+ ServiceQueue0),
+ State0#?STATE{consumers = Cons, service_queue = ServiceQueue}.
+
+maybe_queue_consumer(ConsumerId, #consumer{credit = Credit},
+ ServiceQueue0) ->
+ case Credit > 0 of
+ true ->
+ % consumerect needs service - check if already on service queue
+ uniq_queue_in(ConsumerId, ServiceQueue0);
+ false ->
+ ServiceQueue0
+ end.
+
+convert_prefix_msgs(#?STATE{prefix_msgs = {R, P}} = State) ->
+ State#?STATE{prefix_msgs = {length(R), R, length(P), P}};
+convert_prefix_msgs(State) ->
+ State.
+
+%% creates a dehydrated version of the current state to be cached and
+%% potentially used to for a snaphot at a later point
+dehydrate_state(#?STATE{messages = Messages,
+ consumers = Consumers,
+ returns = Returns,
+ low_msg_num = Low,
+ next_msg_num = Next,
+ prefix_msgs = {PRCnt, PrefRet0, PPCnt, PrefMsg0},
+ waiting_consumers = Waiting0} = State) ->
+ RCnt = lqueue:len(Returns),
+ %% TODO: optimise this function as far as possible
+ PrefRet1 = lists:foldr(fun ({'$prefix_msg', Header}, Acc) ->
+ [Header | Acc];
+ ({'$empty_msg', _} = Msg, Acc) ->
+ [Msg | Acc];
+ ({_, {_, {Header, 'empty'}}}, Acc) ->
+ [{'$empty_msg', Header} | Acc];
+ ({_, {_, {Header, _}}}, Acc) ->
+ [Header | Acc]
+ end,
+ [],
+ lqueue:to_list(Returns)),
+ PrefRet = PrefRet0 ++ PrefRet1,
+ PrefMsgsSuff = dehydrate_messages(Low, Next - 1, Messages, []),
+ %% prefix messages are not populated in normal operation only after
+ %% recovering from a snapshot
+ PrefMsgs = PrefMsg0 ++ PrefMsgsSuff,
+ Waiting = [{Cid, dehydrate_consumer(C)} || {Cid, C} <- Waiting0],
+ State#?STATE{messages = #{},
+ ra_indexes = rabbit_fifo_index:empty(),
+ release_cursors = lqueue:new(),
+ low_msg_num = undefined,
+ consumers = maps:map(fun (_, C) ->
+ dehydrate_consumer(C)
+ end, Consumers),
+ returns = lqueue:new(),
+ prefix_msgs = {PRCnt + RCnt, PrefRet,
+ PPCnt + maps:size(Messages), PrefMsgs},
+ waiting_consumers = Waiting}.
+
+dehydrate_messages(Low, Next, _Msgs, Acc)
+ when Next < Low ->
+ Acc;
+dehydrate_messages(Low, Next, Msgs, Acc0) ->
+ Acc = case maps:get(Next, Msgs) of
+ {_RaftIdx, {_, 'empty'} = Msg} ->
+ [Msg | Acc0];
+ {_RaftIdx, {Header, _}} ->
+ [Header | Acc0]
+ end,
+ dehydrate_messages(Low, Next - 1, Msgs, Acc).
+
+dehydrate_consumer(#consumer{checked_out = Checked0} = Con) ->
+ Checked = maps:map(fun (_, {'$prefix_msg', _} = M) ->
+ M;
+ (_, {'$empty_msg', _} = M) ->
+ M;
+ (_, {_, {_, {Header, 'empty'}}}) ->
+ {'$empty_msg', Header};
+ (_, {_, {_, {Header, _}}}) ->
+ {'$prefix_msg', Header}
+ end, Checked0),
+ Con#consumer{checked_out = Checked}.
+
+%% make the state suitable for equality comparison
+normalize(#?STATE{release_cursors = Cursors} = State) ->
+ State#?STATE{release_cursors = lqueue:from_list(lqueue:to_list(Cursors))}.
+
+is_over_limit(#?STATE{cfg = #cfg{max_length = undefined,
+ max_bytes = undefined}}) ->
+ false;
+is_over_limit(#?STATE{cfg = #cfg{max_length = MaxLength,
+ max_bytes = MaxBytes},
+ msg_bytes_enqueue = BytesEnq} = State) ->
+
+ messages_ready(State) > MaxLength orelse (BytesEnq > MaxBytes).
+
+normalize_for_v1(#?STATE{cfg = Cfg} = State) ->
+ %% run all v0 conversions so that v1 does not have to have this code
+ RCI = case Cfg of
+ #cfg{release_cursor_interval = {_, _} = R} ->
+ R;
+ #cfg{release_cursor_interval = undefined} ->
+ {?RELEASE_CURSOR_EVERY, ?RELEASE_CURSOR_EVERY};
+ #cfg{release_cursor_interval = C} ->
+ {?RELEASE_CURSOR_EVERY, C}
+ end,
+ convert_prefix_msgs(
+ State#?STATE{cfg = Cfg#cfg{release_cursor_interval = RCI}}).
+
+get_field(Field, State) ->
+ Fields = record_info(fields, ?STATE),
+ Index = record_index_of(Field, Fields),
+ element(Index, State).
+
+get_cfg_field(Field, #?STATE{cfg = Cfg} ) ->
+ Fields = record_info(fields, cfg),
+ Index = record_index_of(Field, Fields),
+ element(Index, Cfg).
+
+record_index_of(F, Fields) ->
+ index_of(2, F, Fields).
+
+index_of(_, F, []) ->
+ exit({field_not_found, F});
+index_of(N, F, [F | _]) ->
+ N;
+index_of(N, F, [_ | T]) ->
+ index_of(N+1, F, T).
+
+-spec make_enqueue(option(pid()), option(msg_seqno()), raw_msg()) -> protocol().
+make_enqueue(Pid, Seq, Msg) ->
+ #enqueue{pid = Pid, seq = Seq, msg = Msg}.
+-spec make_checkout(consumer_id(),
+ checkout_spec(), consumer_meta()) -> protocol().
+make_checkout(ConsumerId, Spec, Meta) ->
+ #checkout{consumer_id = ConsumerId,
+ spec = Spec, meta = Meta}.
+
+-spec make_settle(consumer_id(), [msg_id()]) -> protocol().
+make_settle(ConsumerId, MsgIds) ->
+ #settle{consumer_id = ConsumerId, msg_ids = MsgIds}.
+
+-spec make_return(consumer_id(), [msg_id()]) -> protocol().
+make_return(ConsumerId, MsgIds) ->
+ #return{consumer_id = ConsumerId, msg_ids = MsgIds}.
+
+-spec make_discard(consumer_id(), [msg_id()]) -> protocol().
+make_discard(ConsumerId, MsgIds) ->
+ #discard{consumer_id = ConsumerId, msg_ids = MsgIds}.
+
+-spec make_credit(consumer_id(), non_neg_integer(), non_neg_integer(),
+ boolean()) -> protocol().
+make_credit(ConsumerId, Credit, DeliveryCount, Drain) ->
+ #credit{consumer_id = ConsumerId,
+ credit = Credit,
+ delivery_count = DeliveryCount,
+ drain = Drain}.
+
+-spec make_purge() -> protocol().
+make_purge() -> #purge{}.
+
+-spec make_purge_nodes([node()]) -> protocol().
+make_purge_nodes(Nodes) ->
+ #purge_nodes{nodes = Nodes}.
+
+-spec make_update_config(config()) -> protocol().
+make_update_config(Config) ->
+ #update_config{config = Config}.
+
+add_bytes_enqueue(Bytes,
+ #?STATE{msg_bytes_enqueue = Enqueue} = State)
+ when is_integer(Bytes) ->
+ State#?STATE{msg_bytes_enqueue = Enqueue + Bytes};
+add_bytes_enqueue(#{size := Bytes}, State) ->
+ add_bytes_enqueue(Bytes, State).
+
+add_bytes_drop(Bytes,
+ #?STATE{msg_bytes_enqueue = Enqueue} = State)
+ when is_integer(Bytes) ->
+ State#?STATE{msg_bytes_enqueue = Enqueue - Bytes};
+add_bytes_drop(#{size := Bytes}, State) ->
+ add_bytes_drop(Bytes, State).
+
+add_bytes_checkout(Bytes,
+ #?STATE{msg_bytes_checkout = Checkout,
+ msg_bytes_enqueue = Enqueue } = State)
+ when is_integer(Bytes) ->
+ State#?STATE{msg_bytes_checkout = Checkout + Bytes,
+ msg_bytes_enqueue = Enqueue - Bytes};
+add_bytes_checkout(#{size := Bytes}, State) ->
+ add_bytes_checkout(Bytes, State).
+
+add_bytes_settle(Bytes,
+ #?STATE{msg_bytes_checkout = Checkout} = State)
+ when is_integer(Bytes) ->
+ State#?STATE{msg_bytes_checkout = Checkout - Bytes};
+add_bytes_settle(#{size := Bytes}, State) ->
+ add_bytes_settle(Bytes, State).
+
+add_bytes_return(Bytes,
+ #?STATE{msg_bytes_checkout = Checkout,
+ msg_bytes_enqueue = Enqueue} = State)
+ when is_integer(Bytes) ->
+ State#?STATE{msg_bytes_checkout = Checkout - Bytes,
+ msg_bytes_enqueue = Enqueue + Bytes};
+add_bytes_return(#{size := Bytes}, State) ->
+ add_bytes_return(Bytes, State).
+
+add_in_memory_counts(Bytes,
+ #?STATE{msg_bytes_in_memory = InMemoryBytes,
+ msgs_ready_in_memory = InMemoryCount} = State)
+ when is_integer(Bytes) ->
+ State#?STATE{msg_bytes_in_memory = InMemoryBytes + Bytes,
+ msgs_ready_in_memory = InMemoryCount + 1};
+add_in_memory_counts(#{size := Bytes}, State) ->
+ add_in_memory_counts(Bytes, State).
+
+subtract_in_memory_counts(Bytes,
+ #?STATE{msg_bytes_in_memory = InMemoryBytes,
+ msgs_ready_in_memory = InMemoryCount} = State)
+ when is_integer(Bytes) ->
+ State#?STATE{msg_bytes_in_memory = InMemoryBytes - Bytes,
+ msgs_ready_in_memory = InMemoryCount - 1};
+subtract_in_memory_counts(#{size := Bytes}, State) ->
+ subtract_in_memory_counts(Bytes, State).
+
+message_size(#basic_message{content = Content}) ->
+ #content{payload_fragments_rev = PFR} = Content,
+ iolist_size(PFR);
+message_size({'$prefix_msg', H}) ->
+ get_size_from_header(H);
+message_size({'$empty_msg', H}) ->
+ get_size_from_header(H);
+message_size(B) when is_binary(B) ->
+ byte_size(B);
+message_size(Msg) ->
+ %% probably only hit this for testing so ok to use erts_debug
+ erts_debug:size(Msg).
+
+get_size_from_header(Size) when is_integer(Size) ->
+ Size;
+get_size_from_header(#{size := B}) ->
+ B.
+
+
+all_nodes(#?STATE{consumers = Cons0,
+ enqueuers = Enqs0,
+ waiting_consumers = WaitingConsumers0}) ->
+ Nodes0 = maps:fold(fun({_, P}, _, Acc) ->
+ Acc#{node(P) => ok}
+ end, #{}, Cons0),
+ Nodes1 = maps:fold(fun(P, _, Acc) ->
+ Acc#{node(P) => ok}
+ end, Nodes0, Enqs0),
+ maps:keys(
+ lists:foldl(fun({{_, P}, _}, Acc) ->
+ Acc#{node(P) => ok}
+ end, Nodes1, WaitingConsumers0)).
+
+all_pids_for(Node, #?STATE{consumers = Cons0,
+ enqueuers = Enqs0,
+ waiting_consumers = WaitingConsumers0}) ->
+ Cons = maps:fold(fun({_, P}, _, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, _, Acc) -> Acc
+ end, [], Cons0),
+ Enqs = maps:fold(fun(P, _, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, _, Acc) -> Acc
+ end, Cons, Enqs0),
+ lists:foldl(fun({{_, P}, _}, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, Acc) -> Acc
+ end, Enqs, WaitingConsumers0).
+
+suspected_pids_for(Node, #?STATE{consumers = Cons0,
+ enqueuers = Enqs0,
+ waiting_consumers = WaitingConsumers0}) ->
+ Cons = maps:fold(fun({_, P}, #consumer{status = suspected_down}, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, _, Acc) -> Acc
+ end, [], Cons0),
+ Enqs = maps:fold(fun(P, #enqueuer{status = suspected_down}, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, _, Acc) -> Acc
+ end, Cons, Enqs0),
+ lists:foldl(fun({{_, P},
+ #consumer{status = suspected_down}}, Acc)
+ when node(P) =:= Node ->
+ [P | Acc];
+ (_, Acc) -> Acc
+ end, Enqs, WaitingConsumers0).
diff --git a/deps/rabbit/src/rabbit_fifo_v0.hrl b/deps/rabbit/src/rabbit_fifo_v0.hrl
new file mode 100644
index 0000000000..333ccb4d77
--- /dev/null
+++ b/deps/rabbit/src/rabbit_fifo_v0.hrl
@@ -0,0 +1,195 @@
+
+-type option(T) :: undefined | T.
+
+-type raw_msg() :: term().
+%% The raw message. It is opaque to rabbit_fifo.
+
+-type msg_in_id() :: non_neg_integer().
+% a queue scoped monotonically incrementing integer used to enforce order
+% in the unassigned messages map
+
+-type msg_id() :: non_neg_integer().
+%% A consumer-scoped monotonically incrementing integer included with a
+%% {@link delivery/0.}. Used to settle deliveries using
+%% {@link rabbit_fifo_client:settle/3.}
+
+-type msg_seqno() :: non_neg_integer().
+%% A sender process scoped monotonically incrementing integer included
+%% in enqueue messages. Used to ensure ordering of messages send from the
+%% same process
+
+-type msg_header() :: msg_size() |
+ #{size := msg_size(),
+ delivery_count => non_neg_integer()}.
+%% The message header:
+%% delivery_count: the number of unsuccessful delivery attempts.
+%% A non-zero value indicates a previous attempt.
+%% If it only contains the size it can be condensed to an integer only
+
+-type msg() :: {msg_header(), raw_msg()}.
+%% message with a header map.
+
+-type msg_size() :: non_neg_integer().
+%% the size in bytes of the msg payload
+
+-type indexed_msg() :: {ra:index(), msg()}.
+
+-type prefix_msg() :: {'$prefix_msg', msg_header()}.
+
+-type delivery_msg() :: {msg_id(), msg()}.
+%% A tuple consisting of the message id and the headered message.
+
+-type consumer_tag() :: binary().
+%% An arbitrary binary tag used to distinguish between different consumers
+%% set up by the same process. See: {@link rabbit_fifo_client:checkout/3.}
+
+-type delivery() :: {delivery, consumer_tag(), [delivery_msg()]}.
+%% Represents the delivery of one or more rabbit_fifo messages.
+
+-type consumer_id() :: {consumer_tag(), pid()}.
+%% The entity that receives messages. Uniquely identifies a consumer.
+
+-type credit_mode() :: simple_prefetch | credited.
+%% determines how credit is replenished
+
+-type checkout_spec() :: {once | auto, Num :: non_neg_integer(),
+ credit_mode()} |
+ {dequeue, settled | unsettled} |
+ cancel.
+
+-type consumer_meta() :: #{ack => boolean(),
+ username => binary(),
+ prefetch => non_neg_integer(),
+ args => list()}.
+%% static meta data associated with a consumer
+
+
+-type applied_mfa() :: {module(), atom(), list()}.
+% represents a partially applied module call
+
+-define(RELEASE_CURSOR_EVERY, 64000).
+-define(RELEASE_CURSOR_EVERY_MAX, 3200000).
+-define(USE_AVG_HALF_LIFE, 10000.0).
+%% an average QQ without any message uses about 100KB so setting this limit
+%% to ~10 times that should be relatively safe.
+-define(GC_MEM_LIMIT_B, 2000000).
+
+-define(MB, 1048576).
+-define(STATE, rabbit_fifo).
+
+-record(consumer,
+ {meta = #{} :: consumer_meta(),
+ checked_out = #{} :: #{msg_id() => {msg_in_id(), indexed_msg()}},
+ next_msg_id = 0 :: msg_id(), % part of snapshot data
+ %% max number of messages that can be sent
+ %% decremented for each delivery
+ credit = 0 : non_neg_integer(),
+ %% total number of checked out messages - ever
+ %% incremented for each delivery
+ delivery_count = 0 :: non_neg_integer(),
+ %% the mode of how credit is incremented
+ %% simple_prefetch: credit is re-filled as deliveries are settled
+ %% or returned.
+ %% credited: credit can only be changed by receiving a consumer_credit
+ %% command: `{consumer_credit, ReceiverDeliveryCount, Credit}'
+ credit_mode = simple_prefetch :: credit_mode(), % part of snapshot data
+ lifetime = once :: once | auto,
+ status = up :: up | suspected_down | cancelled
+ }).
+
+-type consumer() :: #consumer{}.
+
+-type consumer_strategy() :: competing | single_active.
+
+-record(enqueuer,
+ {next_seqno = 1 :: msg_seqno(),
+ % out of order enqueues - sorted list
+ pending = [] :: [{msg_seqno(), ra:index(), raw_msg()}],
+ status = up :: up | suspected_down
+ }).
+
+-record(cfg,
+ {name :: atom(),
+ resource :: rabbit_types:r('queue'),
+ release_cursor_interval ::
+ undefined | non_neg_integer() |
+ {non_neg_integer(), non_neg_integer()},
+ dead_letter_handler :: option(applied_mfa()),
+ become_leader_handler :: option(applied_mfa()),
+ max_length :: option(non_neg_integer()),
+ max_bytes :: option(non_neg_integer()),
+ %% whether single active consumer is on or not for this queue
+ consumer_strategy = competing :: consumer_strategy(),
+ %% the maximum number of unsuccessful delivery attempts permitted
+ delivery_limit :: option(non_neg_integer()),
+ max_in_memory_length :: option(non_neg_integer()),
+ max_in_memory_bytes :: option(non_neg_integer())
+ }).
+
+-type prefix_msgs() :: {list(), list()} |
+ {non_neg_integer(), list(),
+ non_neg_integer(), list()}.
+
+-record(?STATE,
+ {cfg :: #cfg{},
+ % unassigned messages
+ messages = #{} :: #{msg_in_id() => indexed_msg()},
+ % defines the lowest message in id available in the messages map
+ % that isn't a return
+ low_msg_num :: option(msg_in_id()),
+ % defines the next message in id to be added to the messages map
+ next_msg_num = 1 :: msg_in_id(),
+ % list of returned msg_in_ids - when checking out it picks from
+ % this list first before taking low_msg_num
+ returns = lqueue:new() :: lqueue:lqueue(prefix_msg() |
+ {msg_in_id(), indexed_msg()}),
+ % a counter of enqueues - used to trigger shadow copy points
+ enqueue_count = 0 :: non_neg_integer(),
+ % a map containing all the live processes that have ever enqueued
+ % a message to this queue as well as a cached value of the smallest
+ % ra_index of all pending enqueues
+ enqueuers = #{} :: #{pid() => #enqueuer{}},
+ % master index of all enqueue raft indexes including pending
+ % enqueues
+ % rabbit_fifo_index can be slow when calculating the smallest
+ % index when there are large gaps but should be faster than gb_trees
+ % for normal appending operations as it's backed by a map
+ ra_indexes = rabbit_fifo_index:empty() :: rabbit_fifo_index:state(),
+ release_cursors = lqueue:new() :: lqueue:lqueue({release_cursor,
+ ra:index(), #?STATE{}}),
+ % consumers need to reflect consumer state at time of snapshot
+ % needs to be part of snapshot
+ consumers = #{} :: #{consumer_id() => #consumer{}},
+ % consumers that require further service are queued here
+ % needs to be part of snapshot
+ service_queue = queue:new() :: queue:queue(consumer_id()),
+ %% This is a special field that is only used for snapshots
+ %% It represents the queued messages at the time the
+ %% dehydrated snapshot state was cached.
+ %% As release_cursors are only emitted for raft indexes where all
+ %% prior messages no longer contribute to the current state we can
+ %% replace all message payloads with their sizes (to be used for
+ %% overflow calculations).
+ %% This is done so that consumers are still served in a deterministic
+ %% order on recovery.
+ prefix_msgs = {0, [], 0, []} :: prefix_msgs(),
+ msg_bytes_enqueue = 0 :: non_neg_integer(),
+ msg_bytes_checkout = 0 :: non_neg_integer(),
+ %% waiting consumers, one is picked active consumer is cancelled or dies
+ %% used only when single active consumer is on
+ waiting_consumers = [] :: [{consumer_id(), consumer()}],
+ msg_bytes_in_memory = 0 :: non_neg_integer(),
+ msgs_ready_in_memory = 0 :: non_neg_integer()
+ }).
+
+-type config() :: #{name := atom(),
+ queue_resource := rabbit_types:r('queue'),
+ dead_letter_handler => applied_mfa(),
+ become_leader_handler => applied_mfa(),
+ release_cursor_interval => non_neg_integer(),
+ max_length => non_neg_integer(),
+ max_bytes => non_neg_integer(),
+ max_in_memory_length => non_neg_integer(),
+ max_in_memory_bytes => non_neg_integer(),
+ single_active_consumer_on => boolean(),
+ delivery_limit => non_neg_integer()}.
diff --git a/deps/rabbit/src/rabbit_file.erl b/deps/rabbit/src/rabbit_file.erl
new file mode 100644
index 0000000000..f8263d9e77
--- /dev/null
+++ b/deps/rabbit/src/rabbit_file.erl
@@ -0,0 +1,321 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2011-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_file).
+
+-include_lib("kernel/include/file.hrl").
+
+-export([is_file/1, is_dir/1, file_size/1, ensure_dir/1, wildcard/2, list_dir/1]).
+-export([read_term_file/1, write_term_file/2, write_file/2, write_file/3]).
+-export([append_file/2, ensure_parent_dirs_exist/1]).
+-export([rename/2, delete/1, recursive_delete/1, recursive_copy/2]).
+-export([lock_file/1]).
+-export([read_file_info/1]).
+-export([filename_as_a_directory/1]).
+
+-import(file_handle_cache, [with_handle/1, with_handle/2]).
+
+-define(TMP_EXT, ".tmp").
+
+%%----------------------------------------------------------------------------
+
+-type ok_or_error() :: rabbit_types:ok_or_error(any()).
+
+%%----------------------------------------------------------------------------
+
+-spec is_file((file:filename())) -> boolean().
+
+is_file(File) ->
+ case read_file_info(File) of
+ {ok, #file_info{type=regular}} -> true;
+ {ok, #file_info{type=directory}} -> true;
+ _ -> false
+ end.
+
+-spec is_dir((file:filename())) -> boolean().
+
+is_dir(Dir) -> is_dir_internal(read_file_info(Dir)).
+
+is_dir_no_handle(Dir) -> is_dir_internal(prim_file:read_file_info(Dir)).
+
+is_dir_internal({ok, #file_info{type=directory}}) -> true;
+is_dir_internal(_) -> false.
+
+-spec file_size((file:filename())) -> non_neg_integer().
+
+file_size(File) ->
+ case read_file_info(File) of
+ {ok, #file_info{size=Size}} -> Size;
+ _ -> 0
+ end.
+
+-spec ensure_dir((file:filename())) -> ok_or_error().
+
+ensure_dir(File) -> with_handle(fun () -> ensure_dir_internal(File) end).
+
+ensure_dir_internal("/") ->
+ ok;
+ensure_dir_internal(File) ->
+ Dir = filename:dirname(File),
+ case is_dir_no_handle(Dir) of
+ true -> ok;
+ false -> ensure_dir_internal(Dir),
+ prim_file:make_dir(Dir)
+ end.
+
+-spec wildcard(string(), file:filename()) -> [file:filename()].
+
+wildcard(Pattern, Dir) ->
+ case list_dir(Dir) of
+ {ok, Files} -> {ok, RE} = re:compile(Pattern, [anchored]),
+ [File || File <- Files,
+ match =:= re:run(File, RE, [{capture, none}])];
+ {error, _} -> []
+ end.
+
+-spec list_dir(file:filename()) ->
+ rabbit_types:ok_or_error2([file:filename()], any()).
+
+list_dir(Dir) -> with_handle(fun () -> prim_file:list_dir(Dir) end).
+
+read_file_info(File) ->
+ with_handle(fun () -> prim_file:read_file_info(File) end).
+
+-spec read_term_file
+ (file:filename()) -> {'ok', [any()]} | rabbit_types:error(any()).
+
+read_term_file(File) ->
+ try
+ {ok, Data} = with_handle(fun () -> prim_file:read_file(File) end),
+ {ok, Tokens, _} = erl_scan:string(binary_to_list(Data)),
+ TokenGroups = group_tokens(Tokens),
+ {ok, [begin
+ {ok, Term} = erl_parse:parse_term(Tokens1),
+ Term
+ end || Tokens1 <- TokenGroups]}
+ catch
+ error:{badmatch, Error} -> Error
+ end.
+
+group_tokens(Ts) -> [lists:reverse(G) || G <- group_tokens([], Ts)].
+
+group_tokens([], []) -> [];
+group_tokens(Cur, []) -> [Cur];
+group_tokens(Cur, [T = {dot, _} | Ts]) -> [[T | Cur] | group_tokens([], Ts)];
+group_tokens(Cur, [T | Ts]) -> group_tokens([T | Cur], Ts).
+
+-spec write_term_file(file:filename(), [any()]) -> ok_or_error().
+
+write_term_file(File, Terms) ->
+ write_file(File, list_to_binary([io_lib:format("~w.~n", [Term]) ||
+ Term <- Terms])).
+
+-spec write_file(file:filename(), iodata()) -> ok_or_error().
+
+write_file(Path, Data) -> write_file(Path, Data, []).
+
+-spec write_file(file:filename(), iodata(), [any()]) -> ok_or_error().
+
+write_file(Path, Data, Modes) ->
+ Modes1 = [binary, write | (Modes -- [binary, write])],
+ case make_binary(Data) of
+ Bin when is_binary(Bin) -> write_file1(Path, Bin, Modes1);
+ {error, _} = E -> E
+ end.
+
+%% make_binary/1 is based on the corresponding function in the
+%% kernel/file.erl module of the Erlang R14B02 release, which is
+%% licensed under the EPL.
+
+make_binary(Bin) when is_binary(Bin) ->
+ Bin;
+make_binary(List) ->
+ try
+ iolist_to_binary(List)
+ catch error:Reason ->
+ {error, Reason}
+ end.
+
+write_file1(Path, Bin, Modes) ->
+ try
+ with_synced_copy(Path, Modes,
+ fun (Hdl) ->
+ ok = prim_file:write(Hdl, Bin)
+ end)
+ catch
+ error:{badmatch, Error} -> Error;
+ _:{error, Error} -> {error, Error}
+ end.
+
+with_synced_copy(Path, Modes, Fun) ->
+ case lists:member(append, Modes) of
+ true ->
+ {error, append_not_supported, Path};
+ false ->
+ with_handle(
+ fun () ->
+ Bak = Path ++ ?TMP_EXT,
+ case prim_file:open(Bak, Modes) of
+ {ok, Hdl} ->
+ try
+ Result = Fun(Hdl),
+ ok = prim_file:sync(Hdl),
+ ok = prim_file:rename(Bak, Path),
+ Result
+ after
+ prim_file:close(Hdl)
+ end;
+ {error, _} = E -> E
+ end
+ end)
+ end.
+
+%% TODO the semantics of this function are rather odd. But see bug 25021.
+
+-spec append_file(file:filename(), string()) -> ok_or_error().
+
+append_file(File, Suffix) ->
+ case read_file_info(File) of
+ {ok, FInfo} -> append_file(File, FInfo#file_info.size, Suffix);
+ {error, enoent} -> append_file(File, 0, Suffix);
+ Error -> Error
+ end.
+
+append_file(_, _, "") ->
+ ok;
+append_file(File, 0, Suffix) ->
+ with_handle(fun () ->
+ case prim_file:open([File, Suffix], [append]) of
+ {ok, Fd} -> prim_file:close(Fd);
+ Error -> Error
+ end
+ end);
+append_file(File, _, Suffix) ->
+ case with_handle(2, fun () ->
+ file:copy(File, {[File, Suffix], [append]})
+ end) of
+ {ok, _BytesCopied} -> ok;
+ Error -> Error
+ end.
+
+-spec ensure_parent_dirs_exist(string()) -> 'ok'.
+
+ensure_parent_dirs_exist(Filename) ->
+ case ensure_dir(Filename) of
+ ok -> ok;
+ {error, Reason} ->
+ throw({error, {cannot_create_parent_dirs, Filename, Reason}})
+ end.
+
+-spec rename(file:filename(), file:filename()) -> ok_or_error().
+
+rename(Old, New) -> with_handle(fun () -> prim_file:rename(Old, New) end).
+
+-spec delete([file:filename()]) -> ok_or_error().
+
+delete(File) -> with_handle(fun () -> prim_file:delete(File) end).
+
+-spec recursive_delete([file:filename()]) ->
+ rabbit_types:ok_or_error({file:filename(), any()}).
+
+recursive_delete(Files) ->
+ with_handle(
+ fun () -> lists:foldl(fun (Path, ok) -> recursive_delete1(Path);
+ (_Path, {error, _Err} = Error) -> Error
+ end, ok, Files)
+ end).
+
+recursive_delete1(Path) ->
+ case is_dir_no_handle(Path) and not(is_symlink_no_handle(Path)) of
+ false -> case prim_file:delete(Path) of
+ ok -> ok;
+ {error, enoent} -> ok; %% Path doesn't exist anyway
+ {error, Err} -> {error, {Path, Err}}
+ end;
+ true -> case prim_file:list_dir(Path) of
+ {ok, FileNames} ->
+ case lists:foldl(
+ fun (FileName, ok) ->
+ recursive_delete1(
+ filename:join(Path, FileName));
+ (_FileName, Error) ->
+ Error
+ end, ok, FileNames) of
+ ok ->
+ case prim_file:del_dir(Path) of
+ ok -> ok;
+ {error, Err} -> {error, {Path, Err}}
+ end;
+ {error, _Err} = Error ->
+ Error
+ end;
+ {error, Err} ->
+ {error, {Path, Err}}
+ end
+ end.
+
+is_symlink_no_handle(File) ->
+ case prim_file:read_link(File) of
+ {ok, _} -> true;
+ _ -> false
+ end.
+
+-spec recursive_copy(file:filename(), file:filename()) ->
+ rabbit_types:ok_or_error({file:filename(), file:filename(), any()}).
+
+recursive_copy(Src, Dest) ->
+ %% Note that this uses the 'file' module and, hence, shouldn't be
+ %% run on many processes at once.
+ case is_dir(Src) of
+ false -> case file:copy(Src, Dest) of
+ {ok, _Bytes} -> ok;
+ {error, enoent} -> ok; %% Path doesn't exist anyway
+ {error, Err} -> {error, {Src, Dest, Err}}
+ end;
+ true -> case file:list_dir(Src) of
+ {ok, FileNames} ->
+ case file:make_dir(Dest) of
+ ok ->
+ lists:foldl(
+ fun (FileName, ok) ->
+ recursive_copy(
+ filename:join(Src, FileName),
+ filename:join(Dest, FileName));
+ (_FileName, Error) ->
+ Error
+ end, ok, FileNames);
+ {error, Err} ->
+ {error, {Src, Dest, Err}}
+ end;
+ {error, Err} ->
+ {error, {Src, Dest, Err}}
+ end
+ end.
+
+%% TODO: When we stop supporting Erlang prior to R14, this should be
+%% replaced with file:open [write, exclusive]
+
+-spec lock_file(file:filename()) -> rabbit_types:ok_or_error('eexist').
+
+lock_file(Path) ->
+ case is_file(Path) of
+ true -> {error, eexist};
+ false -> with_handle(
+ fun () -> {ok, Lock} = prim_file:open(Path, [write]),
+ ok = prim_file:close(Lock)
+ end)
+ end.
+
+-spec filename_as_a_directory(file:filename()) -> file:filename().
+
+filename_as_a_directory(FileName) ->
+ case lists:last(FileName) of
+ "/" ->
+ FileName;
+ _ ->
+ FileName ++ "/"
+ end.
diff --git a/deps/rabbit/src/rabbit_framing.erl b/deps/rabbit/src/rabbit_framing.erl
new file mode 100644
index 0000000000..42927b2b68
--- /dev/null
+++ b/deps/rabbit/src/rabbit_framing.erl
@@ -0,0 +1,36 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% TODO auto-generate
+
+-module(rabbit_framing).
+
+-export_type([protocol/0,
+ amqp_field_type/0, amqp_property_type/0,
+ amqp_table/0, amqp_array/0, amqp_value/0,
+ amqp_method_name/0, amqp_method/0, amqp_method_record/0,
+ amqp_method_field_name/0, amqp_property_record/0,
+ amqp_exception/0, amqp_exception_code/0, amqp_class_id/0]).
+
+-type protocol() :: 'rabbit_framing_amqp_0_8' | 'rabbit_framing_amqp_0_9_1'.
+
+-define(protocol_type(T), type(T :: rabbit_framing_amqp_0_8:T |
+ rabbit_framing_amqp_0_9_1:T)).
+
+-?protocol_type(amqp_field_type()).
+-?protocol_type(amqp_property_type()).
+-?protocol_type(amqp_table()).
+-?protocol_type(amqp_array()).
+-?protocol_type(amqp_value()).
+-?protocol_type(amqp_method_name()).
+-?protocol_type(amqp_method()).
+-?protocol_type(amqp_method_record()).
+-?protocol_type(amqp_method_field_name()).
+-?protocol_type(amqp_property_record()).
+-?protocol_type(amqp_exception()).
+-?protocol_type(amqp_exception_code()).
+-?protocol_type(amqp_class_id()).
diff --git a/deps/rabbit/src/rabbit_guid.erl b/deps/rabbit/src/rabbit_guid.erl
new file mode 100644
index 0000000000..01e6464332
--- /dev/null
+++ b/deps/rabbit/src/rabbit_guid.erl
@@ -0,0 +1,181 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_guid).
+
+-behaviour(gen_server).
+
+-export([start_link/0]).
+-export([filename/0]).
+-export([gen/0, gen_secure/0, string/2, binary/2, to_string/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+-define(SERVER, ?MODULE).
+-define(SERIAL_FILENAME, "rabbit_serial").
+
+-record(state, {serial}).
+
+%%----------------------------------------------------------------------------
+
+-export_type([guid/0]).
+
+-type guid() :: binary().
+
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ gen_server:start_link({local, ?SERVER}, ?MODULE,
+ [update_disk_serial()], []).
+
+%% We use this to detect a (possibly rather old) Mnesia directory,
+%% since it has existed since at least 1.7.0 (as far back as I cared
+%% to go).
+
+-spec filename() -> string().
+
+filename() ->
+ filename:join(rabbit_mnesia:dir(), ?SERIAL_FILENAME).
+
+update_disk_serial() ->
+ Filename = filename(),
+ Serial = case rabbit_file:read_term_file(Filename) of
+ {ok, [Num]} -> Num;
+ {ok, []} -> 0; %% [1]
+ {error, enoent} -> 0;
+ {error, Reason} ->
+ throw({error, {cannot_read_serial_file, Filename, Reason}})
+ end,
+ case rabbit_file:write_term_file(Filename, [Serial + 1]) of
+ ok -> ok;
+ {error, Reason1} ->
+ throw({error, {cannot_write_serial_file, Filename, Reason1}})
+ end,
+ Serial.
+%% [1] a couple of users have reported startup failures due to the
+%% file being empty, presumably as a result of filesystem
+%% corruption. While rabbit doesn't cope with that in general, in this
+%% specific case we can be more accommodating.
+
+%% Generate an un-hashed guid.
+fresh() ->
+ %% We don't use erlang:now() here because a) it may return
+ %% duplicates when the system clock has been rewound prior to a
+ %% restart, or ids were generated at a high rate (which causes
+ %% now() to move ahead of the system time), and b) it is really
+ %% slow since it takes a global lock and makes a system call.
+ %%
+ %% A persisted serial number, the node, and a unique reference
+ %% (per node incarnation) uniquely identifies a process in space
+ %% and time.
+ Serial = gen_server:call(?SERVER, serial, infinity),
+ {Serial, node(), make_ref()}.
+
+advance_blocks({B1, B2, B3, B4}, I) ->
+ %% To produce a new set of blocks, we create a new 32bit block
+ %% hashing {B5, I}. The new hash is used as last block, and the
+ %% other three blocks are XORed with it.
+ %%
+ %% Doing this is convenient because it avoids cascading conflicts,
+ %% while being very fast. The conflicts are avoided by propagating
+ %% the changes through all the blocks at each round by XORing, so
+ %% the only occasion in which a collision will take place is when
+ %% all 4 blocks are the same and the counter is the same.
+ %%
+ %% The range (2^32) is provided explicitly since phash uses 2^27
+ %% by default.
+ B5 = erlang:phash2({B1, I}, 4294967296),
+ {{(B2 bxor B5), (B3 bxor B5), (B4 bxor B5), B5}, I+1}.
+
+%% generate a GUID. This function should be used when performance is a
+%% priority and predictability is not an issue. Otherwise use
+%% gen_secure/0.
+
+-spec gen() -> guid().
+
+gen() ->
+ %% We hash a fresh GUID with md5, split it in 4 blocks, and each
+ %% time we need a new guid we rotate them producing a new hash
+ %% with the aid of the counter. Look at the comments in
+ %% advance_blocks/2 for details.
+ case get(guid) of
+ undefined -> <<B1:32, B2:32, B3:32, B4:32>> = Res =
+ erlang:md5(term_to_binary(fresh())),
+ put(guid, {{B1, B2, B3, B4}, 0}),
+ Res;
+ {BS, I} -> {{B1, B2, B3, B4}, _} = S = advance_blocks(BS, I),
+ put(guid, S),
+ <<B1:32, B2:32, B3:32, B4:32>>
+ end.
+
+%% generate a non-predictable GUID.
+%%
+%% The id is only unique within a single cluster and as long as the
+%% serial store hasn't been deleted.
+%%
+%% If you are not concerned with predictability, gen/0 is faster.
+
+-spec gen_secure() -> guid().
+
+gen_secure() ->
+ %% Here instead of hashing once we hash the GUID and the counter
+ %% each time, so that the GUID is not predictable.
+ G = case get(guid_secure) of
+ undefined -> {fresh(), 0};
+ {S, I} -> {S, I+1}
+ end,
+ put(guid_secure, G),
+ erlang:md5(term_to_binary(G)).
+
+%% generate a readable string representation of a GUID.
+%%
+%% employs base64url encoding, which is safer in more contexts than
+%% plain base64.
+
+-spec string(guid() | string(), any()) -> string().
+
+string(G, Prefix) when is_list(Prefix) ->
+ Prefix ++ "-" ++ rabbit_misc:base64url(G);
+string(G, Prefix) when is_binary(Prefix) ->
+ binary_to_list(Prefix) ++ "-" ++ rabbit_misc:base64url(G).
+
+-spec binary(guid() | string(), any()) -> binary().
+
+binary(G, Prefix) ->
+ list_to_binary(string(G, Prefix)).
+
+%% copied from https://stackoverflow.com/questions/1657204/erlang-uuid-generator
+to_string(<<TL:32, TM:16, THV:16, CSR:8, CSL:8, N:48>>) ->
+ lists:flatten(
+ io_lib:format("~8.16.0b-~4.16.0b-~4.16.0b-~2.16.0b~2.16.0b-~12.16.0b",
+ [TL, TM, THV, CSR, CSL, N])).
+
+%%----------------------------------------------------------------------------
+
+init([Serial]) ->
+ {ok, #state{serial = Serial}}.
+
+handle_call(serial, _From, State = #state{serial = Serial}) ->
+ {reply, Serial, State};
+
+handle_call(_Request, _From, State) ->
+ {noreply, State}.
+
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
diff --git a/deps/rabbit/src/rabbit_health_check.erl b/deps/rabbit/src/rabbit_health_check.erl
new file mode 100644
index 0000000000..4674ca7d8e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_health_check.erl
@@ -0,0 +1,80 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+-module(rabbit_health_check).
+
+%% External API
+-export([node/1, node/2]).
+
+%% Internal API
+-export([local/0]).
+
+%%----------------------------------------------------------------------------
+%% External functions
+%%----------------------------------------------------------------------------
+
+-spec node(node(), timeout()) -> ok | {badrpc, term()} | {error_string, string()}.
+
+node(Node) ->
+ %% same default as in CLI
+ node(Node, 70000).
+node(Node, Timeout) ->
+ rabbit_misc:rpc_call(Node, rabbit_health_check, local, [], Timeout).
+
+-spec local() -> ok | {error_string, string()}.
+
+local() ->
+ rabbit_log:warning("rabbitmqctl node_health_check and its HTTP API counterpart are DEPRECATED. "
+ "See https://www.rabbitmq.com/monitoring.html#health-checks for replacement options."),
+ run_checks([list_channels, list_queues, alarms, rabbit_node_monitor]).
+
+%%----------------------------------------------------------------------------
+%% Internal functions
+%%----------------------------------------------------------------------------
+run_checks([]) ->
+ ok;
+run_checks([C|Cs]) ->
+ case node_health_check(C) of
+ ok ->
+ run_checks(Cs);
+ Error ->
+ Error
+ end.
+
+node_health_check(list_channels) ->
+ case rabbit_channel:info_local([pid]) of
+ L when is_list(L) ->
+ ok
+ end;
+
+node_health_check(list_queues) ->
+ health_check_queues(rabbit_vhost:list_names());
+
+node_health_check(rabbit_node_monitor) ->
+ case rabbit_node_monitor:partitions() of
+ [] ->
+ ok;
+ L when is_list(L), length(L) > 0 ->
+ ErrorMsg = io_lib:format("cluster partition in effect: ~p", [L]),
+ {error_string, ErrorMsg}
+ end;
+
+node_health_check(alarms) ->
+ case proplists:get_value(alarms, rabbit:status()) of
+ [] ->
+ ok;
+ Alarms ->
+ ErrorMsg = io_lib:format("resource alarm(s) in effect:~p", [Alarms]),
+ {error_string, ErrorMsg}
+ end.
+
+health_check_queues([]) ->
+ ok;
+health_check_queues([VHost|RestVHosts]) ->
+ case rabbit_amqqueue:info_local(VHost) of
+ L when is_list(L) ->
+ health_check_queues(RestVHosts)
+ end.
diff --git a/deps/rabbit/src/rabbit_lager.erl b/deps/rabbit/src/rabbit_lager.erl
new file mode 100644
index 0000000000..3cbc5e431d
--- /dev/null
+++ b/deps/rabbit/src/rabbit_lager.erl
@@ -0,0 +1,723 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_lager).
+
+-include_lib("rabbit_common/include/rabbit_log.hrl").
+
+%% API
+-export([start_logger/0, log_locations/0, fold_sinks/2,
+ broker_is_started/0, set_log_level/1]).
+
+%% For test purposes
+-export([configure_lager/0]).
+
+-export_type([log_location/0]).
+
+-type log_location() :: string().
+
+start_logger() ->
+ ok = maybe_remove_logger_handler(),
+ ok = app_utils:stop_applications([lager, syslog]),
+ ok = ensure_lager_configured(),
+ ok = app_utils:start_applications([lager]),
+ fold_sinks(
+ fun
+ (_, [], Acc) ->
+ Acc;
+ (SinkName, _, Acc) ->
+ lager:log(SinkName, info, self(),
+ "Log file opened with Lager", []),
+ Acc
+ end, ok),
+ ensure_log_working().
+
+broker_is_started() ->
+ {ok, HwmCurrent} = application:get_env(lager, error_logger_hwm),
+ {ok, HwmOrig0} = application:get_env(lager, error_logger_hwm_original),
+ HwmOrig = case get_most_verbose_log_level() of
+ debug -> HwmOrig0 * 100;
+ _ -> HwmOrig0
+ end,
+ case HwmOrig =:= HwmCurrent of
+ false ->
+ ok = application:set_env(lager, error_logger_hwm, HwmOrig),
+ Handlers = gen_event:which_handlers(lager_event),
+ lists:foreach(fun(Handler) ->
+ lager:set_loghwm(Handler, HwmOrig)
+ end, Handlers),
+ ok;
+ _ ->
+ ok
+ end.
+
+set_log_level(Level) ->
+ IsValidLevel = lists:member(Level, lager_util:levels()),
+ set_log_level(IsValidLevel, Level).
+
+set_log_level(true, Level) ->
+ SinksAndHandlers = [{Sink, gen_event:which_handlers(Sink)} ||
+ Sink <- lager:list_all_sinks()],
+ DefaultHwm = application:get_env(lager, error_logger_hwm_original, 50),
+ Hwm = case Level of
+ debug -> DefaultHwm * 100;
+ _ -> DefaultHwm
+ end,
+ application:set_env(lager, error_logger_hwm, Hwm),
+ set_sink_log_level(SinksAndHandlers, Level, Hwm);
+set_log_level(_, Level) ->
+ {error, {invalid_log_level, Level}}.
+
+set_sink_log_level([], _Level, _Hwm) ->
+ ok;
+set_sink_log_level([{Sink, Handlers}|Rest], Level, Hwm) ->
+ set_sink_handler_log_level(Sink, Handlers, Level, Hwm),
+ set_sink_log_level(Rest, Level, Hwm).
+
+set_sink_handler_log_level(_Sink, [], _Level, _Hwm) ->
+ ok;
+set_sink_handler_log_level(Sink, [Handler|Rest], Level, Hwm)
+ when is_atom(Handler) andalso is_integer(Hwm) ->
+ lager:set_loghwm(Sink, Handler, undefined, Hwm),
+ ok = lager:set_loglevel(Sink, Handler, undefined, Level),
+ set_sink_handler_log_level(Sink, Rest, Level, Hwm);
+set_sink_handler_log_level(Sink, [{Handler, Id}|Rest], Level, Hwm) ->
+ lager:set_loghwm(Sink, Handler, Id, Hwm),
+ ok = lager:set_loglevel(Sink, Handler, Id, Level),
+ set_sink_handler_log_level(Sink, Rest, Level, Hwm);
+set_sink_handler_log_level(Sink, [_|Rest], Level, Hwm) ->
+ set_sink_handler_log_level(Sink, Rest, Level, Hwm).
+
+log_locations() ->
+ ensure_lager_configured(),
+ DefaultHandlers = application:get_env(lager, handlers, []),
+ Sinks = application:get_env(lager, extra_sinks, []),
+ ExtraHandlers = [proplists:get_value(handlers, Props, [])
+ || {_, Props} <- Sinks],
+ lists:sort(log_locations1([DefaultHandlers | ExtraHandlers], [])).
+
+log_locations1([Handlers | Rest], Locations) ->
+ Locations1 = log_locations2(Handlers, Locations),
+ log_locations1(Rest, Locations1);
+log_locations1([], Locations) ->
+ Locations.
+
+log_locations2([{lager_file_backend, Settings} | Rest], Locations) ->
+ FileName = lager_file_name1(Settings),
+ Locations1 = case lists:member(FileName, Locations) of
+ false -> [FileName | Locations];
+ true -> Locations
+ end,
+ log_locations2(Rest, Locations1);
+log_locations2([{lager_console_backend, _} | Rest], Locations) ->
+ Locations1 = case lists:member("<stdout>", Locations) of
+ false -> ["<stdout>" | Locations];
+ true -> Locations
+ end,
+ log_locations2(Rest, Locations1);
+log_locations2([_ | Rest], Locations) ->
+ log_locations2(Rest, Locations);
+log_locations2([], Locations) ->
+ Locations.
+
+fold_sinks(Fun, Acc) ->
+ Handlers = lager_config:global_get(handlers),
+ Sinks = dict:to_list(lists:foldl(
+ fun
+ ({{lager_file_backend, F}, _, S}, Dict) ->
+ dict:append(S, F, Dict);
+ ({_, _, S}, Dict) ->
+ case dict:is_key(S, Dict) of
+ true -> dict:store(S, [], Dict);
+ false -> Dict
+ end
+ end,
+ dict:new(), Handlers)),
+ fold_sinks(Sinks, Fun, Acc).
+
+fold_sinks([{SinkName, FileNames} | Rest], Fun, Acc) ->
+ Acc1 = Fun(SinkName, FileNames, Acc),
+ fold_sinks(Rest, Fun, Acc1);
+fold_sinks([], _, Acc) ->
+ Acc.
+
+ensure_log_working() ->
+ {ok, Handlers} = application:get_env(lager, handlers),
+ [ ensure_lager_handler_file_exist(Handler)
+ || Handler <- Handlers ],
+ Sinks = application:get_env(lager, extra_sinks, []),
+ ensure_extra_sinks_working(Sinks, list_expected_sinks()).
+
+ensure_extra_sinks_working(Sinks, [SinkName | Rest]) ->
+ case proplists:get_value(SinkName, Sinks) of
+ undefined -> throw({error, {cannot_log_to_file, unknown,
+ rabbit_log_lager_event_sink_undefined}});
+ Sink ->
+ SinkHandlers = proplists:get_value(handlers, Sink, []),
+ [ ensure_lager_handler_file_exist(Handler)
+ || Handler <- SinkHandlers ]
+ end,
+ ensure_extra_sinks_working(Sinks, Rest);
+ensure_extra_sinks_working(_Sinks, []) ->
+ ok.
+
+ensure_lager_handler_file_exist(Handler) ->
+ case lager_file_name(Handler) of
+ false -> ok;
+ FileName -> ensure_logfile_exist(FileName)
+ end.
+
+lager_file_name({lager_file_backend, Settings}) ->
+ lager_file_name1(Settings);
+lager_file_name(_) ->
+ false.
+
+lager_file_name1(Settings) when is_list(Settings) ->
+ {file, FileName} = proplists:lookup(file, Settings),
+ lager_util:expand_path(FileName);
+lager_file_name1({FileName, _}) -> lager_util:expand_path(FileName);
+lager_file_name1({FileName, _, _, _, _}) -> lager_util:expand_path(FileName);
+lager_file_name1(_) ->
+ throw({error, {cannot_log_to_file, unknown,
+ lager_file_backend_config_invalid}}).
+
+
+ensure_logfile_exist(FileName) ->
+ LogFile = lager_util:expand_path(FileName),
+ case rabbit_file:read_file_info(LogFile) of
+ {ok,_} -> ok;
+ {error, Err} -> throw({error, {cannot_log_to_file, LogFile, Err}})
+ end.
+
+ensure_lager_configured() ->
+ case lager_configured() of
+ false -> configure_lager();
+ true -> ok
+ end.
+
+%% Lager should have handlers and sinks
+%% Error logger forwarding to syslog should be disabled
+lager_configured() ->
+ Sinks = lager:list_all_sinks(),
+ ExpectedSinks = list_expected_sinks(),
+ application:get_env(lager, handlers) =/= undefined
+ andalso
+ lists:all(fun(S) -> lists:member(S, Sinks) end, ExpectedSinks)
+ andalso
+ application:get_env(syslog, syslog_error_logger) =/= undefined.
+
+configure_lager() ->
+ ok = app_utils:load_applications([lager]),
+ %% Turn off reformatting for error_logger messages
+ case application:get_env(lager, error_logger_redirect) of
+ undefined -> application:set_env(lager, error_logger_redirect, true);
+ _ -> ok
+ end,
+ case application:get_env(lager, error_logger_format_raw) of
+ undefined -> application:set_env(lager, error_logger_format_raw, true);
+ _ -> ok
+ end,
+ case application:get_env(lager, log_root) of
+ undefined ->
+ %% Setting env var to 'undefined' is different from not
+ %% setting it at all, and lager is sensitive to this
+ %% difference.
+ case application:get_env(rabbit, lager_log_root) of
+ {ok, Value} ->
+ ok = application:set_env(lager, log_root, Value);
+ _ ->
+ ok
+ end;
+ _ -> ok
+ end,
+ case application:get_env(lager, colored) of
+ undefined ->
+ UseColor = rabbit_prelaunch_early_logging:use_colored_logging(),
+ application:set_env(lager, colored, UseColor);
+ _ ->
+ ok
+ end,
+ %% Set rabbit.log config variable based on environment.
+ prepare_rabbit_log_config(),
+ %% Configure syslog library.
+ ok = configure_syslog_error_logger(),
+ %% At this point we should have rabbit.log application variable
+ %% configured to generate RabbitMQ log handlers.
+ GeneratedHandlers = generate_lager_handlers(),
+
+ %% If there are lager handlers configured,
+ %% both lager and generate RabbitMQ handlers are used.
+ %% This is because it's hard to decide clear preference rules.
+ %% RabbitMQ handlers can be set to [] to use only lager handlers.
+ Handlers = case application:get_env(lager, handlers, undefined) of
+ undefined -> GeneratedHandlers;
+ LagerHandlers ->
+ %% Remove handlers generated in previous starts
+ FormerRabbitHandlers = application:get_env(lager, rabbit_handlers, []),
+ GeneratedHandlers ++ remove_rabbit_handlers(LagerHandlers,
+ FormerRabbitHandlers)
+ end,
+
+ ok = application:set_env(lager, handlers, Handlers),
+ ok = application:set_env(lager, rabbit_handlers, GeneratedHandlers),
+
+ %% Setup extra sink/handlers. If they are not configured, redirect
+ %% messages to the default sink. To know the list of expected extra
+ %% sinks, we look at the 'lager_extra_sinks' compilation option.
+ LogConfig = application:get_env(rabbit, log, []),
+ LogLevels = application:get_env(rabbit, log_levels, []),
+ Categories = proplists:get_value(categories, LogConfig, []),
+ CategoriesConfig0 = case {Categories, LogLevels} of
+ {[], []} -> [];
+ {[], LogLevels} ->
+ io:format("Using deprecated config parameter 'log_levels'. "
+ "Please update your configuration file according to "
+ "https://rabbitmq.com/logging.html"),
+ lists:map(fun({Name, Level}) -> {Name, [{level, Level}]} end,
+ LogLevels);
+ {Categories, []} ->
+ Categories;
+ {Categories, _} ->
+ io:format("Using the deprecated config parameter 'rabbit.log_levels' together "
+ "with a new parameter for log categories."
+ " 'rabbit.log_levels' will be ignored. Please remove it from the config. More at "
+ "https://rabbitmq.com/logging.html"),
+ Categories
+ end,
+ LogLevelsFromContext = case rabbit_prelaunch:get_context() of
+ #{log_levels := LL} -> LL;
+ _ -> undefined
+ end,
+ Fun = fun
+ (global, _, CC) ->
+ CC;
+ (color, _, CC) ->
+ CC;
+ (CategoryS, LogLevel, CC) ->
+ Category = list_to_atom(CategoryS),
+ CCEntry = proplists:get_value(
+ Category, CC, []),
+ CCEntry1 = lists:ukeymerge(
+ 1,
+ [{level, LogLevel}],
+ lists:ukeysort(1, CCEntry)),
+ lists:keystore(
+ Category, 1, CC, {Category, CCEntry1})
+ end,
+ CategoriesConfig = case LogLevelsFromContext of
+ undefined ->
+ CategoriesConfig0;
+ _ ->
+ maps:fold(Fun,
+ CategoriesConfig0,
+ LogLevelsFromContext)
+ end,
+ SinkConfigs = lists:map(
+ fun({Name, Config}) ->
+ {rabbit_log:make_internal_sink_name(Name), Config}
+ end,
+ CategoriesConfig),
+ LagerSinks = application:get_env(lager, extra_sinks, []),
+ GeneratedSinks = generate_lager_sinks(
+ [error_logger_lager_event | list_expected_sinks()],
+ SinkConfigs),
+ Sinks = merge_lager_sink_handlers(LagerSinks, GeneratedSinks, []),
+ ok = application:set_env(lager, extra_sinks, Sinks),
+
+ case application:get_env(lager, error_logger_hwm) of
+ undefined ->
+ ok = application:set_env(lager, error_logger_hwm, 1000),
+ % NB: 50 is the default value in lager.app.src
+ ok = application:set_env(lager, error_logger_hwm_original, 50);
+ {ok, Val} when is_integer(Val) andalso Val < 1000 ->
+ ok = application:set_env(lager, error_logger_hwm, 1000),
+ ok = application:set_env(lager, error_logger_hwm_original, Val);
+ {ok, Val} when is_integer(Val) ->
+ ok = application:set_env(lager, error_logger_hwm_original, Val),
+ ok
+ end,
+ ok.
+
+configure_syslog_error_logger() ->
+ %% Disable error_logger forwarding to syslog if it's not configured
+ case application:get_env(syslog, syslog_error_logger) of
+ undefined ->
+ application:set_env(syslog, syslog_error_logger, false);
+ _ -> ok
+ end.
+
+remove_rabbit_handlers(Handlers, FormerHandlers) ->
+ lists:filter(fun(Handler) ->
+ not lists:member(Handler, FormerHandlers)
+ end,
+ Handlers).
+
+generate_lager_handlers() ->
+ LogConfig = application:get_env(rabbit, log, []),
+ LogHandlersConfig = lists:keydelete(categories, 1, LogConfig),
+ generate_lager_handlers(LogHandlersConfig).
+
+generate_lager_handlers(LogHandlersConfig) ->
+ lists:flatmap(
+ fun
+ ({file, HandlerConfig}) ->
+ case proplists:get_value(file, HandlerConfig, false) of
+ false -> [];
+ FileName when is_list(FileName) ->
+ Backend = lager_backend(file),
+ generate_handler(Backend, HandlerConfig)
+ end;
+ ({Other, HandlerConfig}) when
+ Other =:= console; Other =:= syslog; Other =:= exchange ->
+ case proplists:get_value(enabled, HandlerConfig, false) of
+ false -> [];
+ true ->
+ Backend = lager_backend(Other),
+ generate_handler(Backend,
+ lists:keydelete(enabled, 1, HandlerConfig))
+ end
+ end,
+ LogHandlersConfig).
+
+lager_backend(file) -> lager_file_backend;
+lager_backend(console) -> lager_console_backend;
+lager_backend(syslog) -> syslog_lager_backend;
+lager_backend(exchange) -> lager_exchange_backend.
+
+%% Syslog backend is using an old API for configuration and
+%% does not support proplists.
+generate_handler(syslog_lager_backend=Backend, HandlerConfig) ->
+ %% The default log level is set to `debug` because the actual
+ %% filtering is made at the sink level. We want to accept all
+ %% messages here.
+ DefaultConfigVal = debug,
+ Level = proplists:get_value(level, HandlerConfig, DefaultConfigVal),
+ ok = configure_handler_backend(Backend),
+ [{Backend,
+ [Level,
+ {},
+ {lager_default_formatter, syslog_formatter_config()}]}];
+generate_handler(Backend, HandlerConfig) ->
+ [{Backend,
+ lists:ukeymerge(1, lists:ukeysort(1, HandlerConfig),
+ lists:ukeysort(1, default_handler_config(Backend)))}].
+
+configure_handler_backend(syslog_lager_backend) ->
+ {ok, _} = application:ensure_all_started(syslog),
+ ok;
+configure_handler_backend(_Backend) ->
+ ok.
+
+default_handler_config(lager_console_backend) ->
+ %% The default log level is set to `debug` because the actual
+ %% filtering is made at the sink level. We want to accept all
+ %% messages here.
+ DefaultConfigVal = debug,
+ [{level, DefaultConfigVal},
+ {formatter_config, default_config_value({formatter_config, console})}];
+default_handler_config(lager_exchange_backend) ->
+ %% The default log level is set to `debug` because the actual
+ %% filtering is made at the sink level. We want to accept all
+ %% messages here.
+ DefaultConfigVal = debug,
+ [{level, DefaultConfigVal},
+ {formatter_config, default_config_value({formatter_config, exchange})}];
+default_handler_config(lager_file_backend) ->
+ %% The default log level is set to `debug` because the actual
+ %% filtering is made at the sink level. We want to accept all
+ %% messages here.
+ DefaultConfigVal = debug,
+ [{level, DefaultConfigVal},
+ {formatter_config, default_config_value({formatter_config, file})},
+ {date, ""},
+ {size, 0}].
+
+default_config_value(level) ->
+ LogConfig = application:get_env(rabbit, log, []),
+ FoldFun = fun
+ ({_, Cfg}, LL) when is_list(Cfg) ->
+ NewLL = proplists:get_value(level, Cfg, LL),
+ case LL of
+ undefined ->
+ NewLL;
+ _ ->
+ MoreVerbose = lager_util:level_to_num(NewLL) > lager_util:level_to_num(LL),
+ case MoreVerbose of
+ true -> NewLL;
+ false -> LL
+ end
+ end;
+ (_, LL) ->
+ LL
+ end,
+ FoundLL = lists:foldl(FoldFun, undefined, LogConfig),
+ case FoundLL of
+ undefined -> info;
+ _ -> FoundLL
+ end;
+default_config_value({formatter_config, console}) ->
+ EOL = case application:get_env(lager, colored) of
+ {ok, true} -> "\e[0m\r\n";
+ _ -> "\r\n"
+ end,
+ [date, " ", time, " ", color, "[", severity, "] ",
+ {pid, ""},
+ " ", message, EOL];
+default_config_value({formatter_config, _}) ->
+ [date, " ", time, " ", color, "[", severity, "] ",
+ {pid, ""},
+ " ", message, "\n"].
+
+syslog_formatter_config() ->
+ [color, "[", severity, "] ",
+ {pid, ""},
+ " ", message, "\n"].
+
+prepare_rabbit_log_config() ->
+ %% If RABBIT_LOGS is not set, we should ignore it.
+ DefaultFile = application:get_env(rabbit, lager_default_file, undefined),
+ %% If RABBIT_UPGRADE_LOGS is not set, we should ignore it.
+ UpgradeFile = application:get_env(rabbit, lager_upgrade_file, undefined),
+ case DefaultFile of
+ undefined -> ok;
+ false ->
+ set_env_default_log_disabled();
+ tty ->
+ set_env_default_log_console();
+ FileName when is_list(FileName) ->
+ case rabbit_prelaunch:get_context() of
+ %% The user explicitly sets $RABBITMQ_LOGS;
+ %% we should override a file location even
+ %% if it's set in rabbitmq.config
+ #{var_origins := #{main_log_file := environment}} ->
+ set_env_default_log_file(FileName, override);
+ _ ->
+ set_env_default_log_file(FileName, keep)
+ end
+ end,
+
+ %% Upgrade log file never overrides the value set in rabbitmq.config
+ case UpgradeFile of
+ %% No special env for upgrade logs - redirect to the default sink
+ undefined -> ok;
+ %% Redirect logs to default output.
+ DefaultFile -> ok;
+ UpgradeFileName when is_list(UpgradeFileName) ->
+ set_env_upgrade_log_file(UpgradeFileName)
+ end.
+
+set_env_default_log_disabled() ->
+ %% Disabling all the logs.
+ ok = application:set_env(rabbit, log, []).
+
+set_env_default_log_console() ->
+ LogConfig = application:get_env(rabbit, log, []),
+ ConsoleConfig = proplists:get_value(console, LogConfig, []),
+ LogConfigConsole =
+ lists:keystore(console, 1, LogConfig,
+ {console, lists:keystore(enabled, 1, ConsoleConfig,
+ {enabled, true})}),
+ %% Remove the file handler - disable logging to file
+ LogConfigConsoleNoFile = lists:keydelete(file, 1, LogConfigConsole),
+ ok = application:set_env(rabbit, log, LogConfigConsoleNoFile).
+
+set_env_default_log_file(FileName, Override) ->
+ LogConfig = application:get_env(rabbit, log, []),
+ FileConfig = proplists:get_value(file, LogConfig, []),
+ NewLogConfig = case proplists:get_value(file, FileConfig, undefined) of
+ undefined ->
+ lists:keystore(file, 1, LogConfig,
+ {file, lists:keystore(file, 1, FileConfig,
+ {file, FileName})});
+ _ConfiguredFileName ->
+ case Override of
+ override ->
+ lists:keystore(
+ file, 1, LogConfig,
+ {file, lists:keystore(file, 1, FileConfig,
+ {file, FileName})});
+ keep ->
+ LogConfig
+ end
+ end,
+ ok = application:set_env(rabbit, log, NewLogConfig).
+
+set_env_upgrade_log_file(FileName) ->
+ LogConfig = application:get_env(rabbit, log, []),
+ SinksConfig = proplists:get_value(categories, LogConfig, []),
+ UpgradeSinkConfig = proplists:get_value(upgrade, SinksConfig, []),
+ FileConfig = proplists:get_value(file, SinksConfig, []),
+ NewLogConfig = case proplists:get_value(file, FileConfig, undefined) of
+ undefined ->
+ lists:keystore(
+ categories, 1, LogConfig,
+ {categories,
+ lists:keystore(
+ upgrade, 1, SinksConfig,
+ {upgrade,
+ lists:keystore(file, 1, UpgradeSinkConfig,
+ {file, FileName})})});
+ %% No cahnge. We don't want to override the configured value.
+ _File -> LogConfig
+ end,
+ ok = application:set_env(rabbit, log, NewLogConfig).
+
+generate_lager_sinks(SinkNames, SinkConfigs) ->
+ LogLevels = case rabbit_prelaunch:get_context() of
+ #{log_levels := LL} -> LL;
+ _ -> undefined
+ end,
+ DefaultLogLevel = case LogLevels of
+ #{global := LogLevel} ->
+ LogLevel;
+ _ ->
+ default_config_value(level)
+ end,
+ lists:map(fun(SinkName) ->
+ SinkConfig = proplists:get_value(SinkName, SinkConfigs, []),
+ SinkHandlers = case proplists:get_value(file, SinkConfig, false) of
+ %% If no file defined - forward everything to the default backend
+ false ->
+ ForwarderLevel = proplists:get_value(level,
+ SinkConfig,
+ DefaultLogLevel),
+ [{lager_forwarder_backend,
+ [lager_util:make_internal_sink_name(lager), ForwarderLevel]}];
+ %% If a file defined - add a file backend to handlers and remove all default file backends.
+ File ->
+ %% Use `debug` as a default handler to not override a handler level
+ Level = proplists:get_value(level, SinkConfig, DefaultLogLevel),
+ DefaultGeneratedHandlers = application:get_env(lager, rabbit_handlers, []),
+ SinkFileHandlers = case proplists:get_value(lager_file_backend, DefaultGeneratedHandlers, undefined) of
+ undefined ->
+ %% Create a new file handler.
+ %% `info` is a default level here.
+ FileLevel = proplists:get_value(level, SinkConfig, DefaultLogLevel),
+ generate_lager_handlers([{file, [{file, File}, {level, FileLevel}]}]);
+ FileHandler ->
+ %% Replace a filename in the handler
+ FileHandlerChanges = case handler_level_more_verbose(FileHandler, Level) of
+ true -> [{file, File}, {level, Level}];
+ false -> [{file, File}]
+ end,
+
+ [{lager_file_backend,
+ lists:ukeymerge(1, FileHandlerChanges,
+ lists:ukeysort(1, FileHandler))}]
+ end,
+ %% Remove all file handlers.
+ AllLagerHandlers = application:get_env(lager, handlers, []),
+ HandlersWithoutFile = lists:filter(
+ fun({lager_file_backend, _}) -> false;
+ ({_, _}) -> true
+ end,
+ AllLagerHandlers),
+ %% Set level for handlers which are more verbose.
+ %% We don't increase verbosity in sinks so it works like forwarder backend.
+ HandlersWithoutFileWithLevel = lists:map(fun({Name, Handler}) ->
+ case handler_level_more_verbose(Handler, Level) of
+ true -> {Name, lists:keystore(level, 1, Handler, {level, Level})};
+ false -> {Name, Handler}
+ end
+ end,
+ HandlersWithoutFile),
+
+ HandlersWithoutFileWithLevel ++ SinkFileHandlers
+ end,
+ {SinkName, [{handlers, SinkHandlers}, {rabbit_handlers, SinkHandlers}]}
+ end,
+ SinkNames).
+
+handler_level_more_verbose(Handler, Level) ->
+ HandlerLevel = proplists:get_value(level, Handler, default_config_value(level)),
+ lager_util:level_to_num(HandlerLevel) > lager_util:level_to_num(Level).
+
+merge_lager_sink_handlers([{Name, Sink} | RestSinks], GeneratedSinks, Agg) ->
+ %% rabbitmq/rabbitmq-server#2044.
+ %% We have to take into account that a sink's
+ %% handler backend may need additional configuration here.
+ %% {rabbit_log_federation_lager_event, [
+ %% {handlers, [
+ %% {lager_forwarder_backend, [lager_event,inherit]},
+ %% {syslog_lager_backend, [debug]}
+ %% ]},
+ %% {rabbit_handlers, [
+ %% {lager_forwarder_backend, [lager_event,inherit]}
+ %% ]}
+ %% ]}
+ case lists:keytake(Name, 1, GeneratedSinks) of
+ {value, {Name, GenSink}, RestGeneratedSinks} ->
+ Handlers = proplists:get_value(handlers, Sink, []),
+ GenHandlers = proplists:get_value(handlers, GenSink, []),
+ FormerRabbitHandlers = proplists:get_value(rabbit_handlers, Sink, []),
+
+ %% Remove handlers defined in previous starts
+ ConfiguredHandlers = remove_rabbit_handlers(Handlers, FormerRabbitHandlers),
+ NewHandlers = GenHandlers ++ ConfiguredHandlers,
+ ok = maybe_configure_handler_backends(NewHandlers),
+ MergedSink = lists:keystore(rabbit_handlers, 1,
+ lists:keystore(handlers, 1, Sink,
+ {handlers, NewHandlers}),
+ {rabbit_handlers, GenHandlers}),
+ merge_lager_sink_handlers(
+ RestSinks,
+ RestGeneratedSinks,
+ [{Name, MergedSink} | Agg]);
+ false ->
+ merge_lager_sink_handlers(
+ RestSinks,
+ GeneratedSinks,
+ [{Name, Sink} | Agg])
+ end;
+merge_lager_sink_handlers([], GeneratedSinks, Agg) -> GeneratedSinks ++ Agg.
+
+maybe_configure_handler_backends([]) ->
+ ok;
+maybe_configure_handler_backends([{Backend, _}|Backends]) ->
+ ok = configure_handler_backend(Backend),
+ maybe_configure_handler_backends(Backends).
+
+list_expected_sinks() ->
+ rabbit_prelaunch_early_logging:list_expected_sinks().
+
+maybe_remove_logger_handler() ->
+ M = logger,
+ F = remove_handler,
+ try
+ ok = erlang:apply(M, F, [default])
+ catch
+ error:undef ->
+ % OK since the logger module only exists in OTP 21.1 or later
+ ok;
+ error:{badmatch, {error, {not_found, default}}} ->
+ % OK - this error happens when running a CLI command
+ ok;
+ Err:Reason ->
+ error_logger:error_msg("calling ~p:~p failed: ~p:~p~n",
+ [M, F, Err, Reason])
+ end.
+
+get_most_verbose_log_level() ->
+ {ok, HandlersA} = application:get_env(lager, handlers),
+ {ok, ExtraSinks} = application:get_env(lager, extra_sinks),
+ HandlersB = lists:append(
+ [H || {_, Keys} <- ExtraSinks,
+ {handlers, H} <- Keys]),
+ get_most_verbose_log_level(HandlersA ++ HandlersB,
+ lager_util:level_to_num(none)).
+
+get_most_verbose_log_level([{_, Props} | Rest], MostVerbose) ->
+ LogLevel = proplists:get_value(level, Props, info),
+ LogLevelNum = lager_util:level_to_num(LogLevel),
+ case LogLevelNum > MostVerbose of
+ true ->
+ get_most_verbose_log_level(Rest, LogLevelNum);
+ false ->
+ get_most_verbose_log_level(Rest, MostVerbose)
+ end;
+get_most_verbose_log_level([], MostVerbose) ->
+ lager_util:num_to_level(MostVerbose).
diff --git a/deps/rabbit/src/rabbit_limiter.erl b/deps/rabbit/src/rabbit_limiter.erl
new file mode 100644
index 0000000000..d3803957d3
--- /dev/null
+++ b/deps/rabbit/src/rabbit_limiter.erl
@@ -0,0 +1,448 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% The purpose of the limiter is to stem the flow of messages from
+%% queues to channels, in order to act upon various protocol-level
+%% flow control mechanisms, specifically AMQP 0-9-1's basic.qos
+%% prefetch_count, our consumer prefetch extension, and AMQP 1.0's
+%% link (aka consumer) credit mechanism.
+%%
+%% Each channel has an associated limiter process, created with
+%% start_link/1, which it passes to queues on consumer creation with
+%% rabbit_amqqueue:basic_consume/10, and rabbit_amqqueue:basic_get/4.
+%% The latter isn't strictly necessary, since basic.get is not
+%% subject to limiting, but it means that whenever a queue knows about
+%% a channel, it also knows about its limiter, which is less fiddly.
+%%
+%% The limiter process holds state that is, in effect, shared between
+%% the channel and all queues from which the channel is
+%% consuming. Essentially all these queues are competing for access to
+%% a single, limited resource - the ability to deliver messages via
+%% the channel - and it is the job of the limiter process to mediate
+%% that access.
+%%
+%% The limiter process is separate from the channel process for two
+%% reasons: separation of concerns, and efficiency. Channels can get
+%% very busy, particularly if they are also dealing with publishes.
+%% With a separate limiter process all the aforementioned access
+%% mediation can take place without touching the channel.
+%%
+%% For efficiency, both the channel and the queues keep some local
+%% state, initialised from the limiter pid with new/1 and client/1,
+%% respectively. In particular this allows them to avoid any
+%% interaction with the limiter process when it is 'inactive', i.e. no
+%% protocol-level flow control is taking place.
+%%
+%% This optimisation does come at the cost of some complexity though:
+%% when a limiter becomes active, the channel needs to inform all its
+%% consumer queues of this change in status. It does this by invoking
+%% rabbit_amqqueue:activate_limit_all/2. Note that there is no inverse
+%% transition, i.e. once a queue has been told about an active
+%% limiter, it is not subsequently told when that limiter becomes
+%% inactive. In practice it is rare for that to happen, though we
+%% could optimise this case in the future.
+%%
+%% Consumer credit (for AMQP 1.0) and per-consumer prefetch (for AMQP
+%% 0-9-1) are treated as essentially the same thing, but with the
+%% exception that per-consumer prefetch gets an auto-topup when
+%% acknowledgments come in.
+%%
+%% The bookkeeping for this is local to queues, so it is not necessary
+%% to store information about it in the limiter process. But for
+%% abstraction we hide it from the queue behind the limiter API, and
+%% it therefore becomes part of the queue local state.
+%%
+%% The interactions with the limiter are as follows:
+%%
+%% 1. Channels tell the limiter about basic.qos prefetch counts -
+%% that's what the limit_prefetch/3, unlimit_prefetch/1,
+%% get_prefetch_limit/1 API functions are about. They also tell the
+%% limiter queue state (via the queue) about consumer credit
+%% changes and message acknowledgement - that's what credit/5 and
+%% ack_from_queue/3 are for.
+%%
+%% 2. Queues also tell the limiter queue state about the queue
+%% becoming empty (via drained/1) and consumers leaving (via
+%% forget_consumer/2).
+%%
+%% 3. Queues register with the limiter - this happens as part of
+%% activate/1.
+%%
+%% 4. The limiter process maintains an internal counter of 'messages
+%% sent but not yet acknowledged', called the 'volume'.
+%%
+%% 5. Queues ask the limiter for permission (with can_send/3) whenever
+%% they want to deliver a message to a channel. The limiter checks
+%% whether a) the volume has not yet reached the prefetch limit,
+%% and b) whether the consumer has enough credit. If so it
+%% increments the volume and tells the queue to proceed. Otherwise
+%% it marks the queue as requiring notification (see below) and
+%% tells the queue not to proceed.
+%%
+%% 6. A queue that has been told to proceed (by the return value of
+%% can_send/3) sends the message to the channel. Conversely, a
+%% queue that has been told not to proceed, will not attempt to
+%% deliver that message, or any future messages, to the
+%% channel. This is accomplished by can_send/3 capturing the
+%% outcome in the local state, where it can be accessed with
+%% is_suspended/1.
+%%
+%% 7. When a channel receives an ack it tells the limiter (via ack/2)
+%% how many messages were ack'ed. The limiter process decrements
+%% the volume and if it falls below the prefetch_count then it
+%% notifies (through rabbit_amqqueue:resume/2) all the queues
+%% requiring notification, i.e. all those that had a can_send/3
+%% request denied.
+%%
+%% 8. Upon receipt of such a notification, queues resume delivery to
+%% the channel, i.e. they will once again start asking limiter, as
+%% described in (5).
+%%
+%% 9. When a queue has no more consumers associated with a particular
+%% channel, it deactivates use of the limiter with deactivate/1,
+%% which alters the local state such that no further interactions
+%% with the limiter process take place until a subsequent
+%% activate/1.
+
+-module(rabbit_limiter).
+
+-include("rabbit.hrl").
+
+-behaviour(gen_server2).
+
+-export([start_link/1]).
+%% channel API
+-export([new/1, limit_prefetch/3, unlimit_prefetch/1, is_active/1,
+ get_prefetch_limit/1, ack/2, pid/1]).
+%% queue API
+-export([client/1, activate/1, can_send/3, resume/1, deactivate/1,
+ is_suspended/1, is_consumer_blocked/2, credit/5, ack_from_queue/3,
+ drained/1, forget_consumer/2]).
+%% callbacks
+-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
+ handle_info/2, prioritise_call/4]).
+
+%%----------------------------------------------------------------------------
+
+-record(lstate, {pid, prefetch_limited}).
+-record(qstate, {pid, state, credits}).
+
+-type lstate() :: #lstate{pid :: pid(),
+ prefetch_limited :: boolean()}.
+-type qstate() :: #qstate{pid :: pid(),
+ state :: 'dormant' | 'active' | 'suspended'}.
+
+-type credit_mode() :: 'manual' | 'drain' | 'auto'.
+
+%%----------------------------------------------------------------------------
+
+-record(lim, {prefetch_count = 0,
+ ch_pid,
+ %% 'Notify' is a boolean that indicates whether a queue should be
+ %% notified of a change in the limit or volume that may allow it to
+ %% deliver more messages via the limiter's channel.
+ queues = maps:new(), % QPid -> {MonitorRef, Notify}
+ volume = 0}).
+
+%% mode is of type credit_mode()
+-record(credit, {credit = 0, mode}).
+
+%%----------------------------------------------------------------------------
+%% API
+%%----------------------------------------------------------------------------
+
+-spec start_link(rabbit_types:proc_name()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(ProcName) -> gen_server2:start_link(?MODULE, [ProcName], []).
+
+-spec new(pid()) -> lstate().
+
+new(Pid) ->
+ %% this a 'call' to ensure that it is invoked at most once.
+ ok = gen_server:call(Pid, {new, self()}, infinity),
+ #lstate{pid = Pid, prefetch_limited = false}.
+
+-spec limit_prefetch(lstate(), non_neg_integer(), non_neg_integer()) ->
+ lstate().
+
+limit_prefetch(L, PrefetchCount, UnackedCount) when PrefetchCount > 0 ->
+ ok = gen_server:call(
+ L#lstate.pid,
+ {limit_prefetch, PrefetchCount, UnackedCount}, infinity),
+ L#lstate{prefetch_limited = true}.
+
+-spec unlimit_prefetch(lstate()) -> lstate().
+
+unlimit_prefetch(L) ->
+ ok = gen_server:call(L#lstate.pid, unlimit_prefetch, infinity),
+ L#lstate{prefetch_limited = false}.
+
+-spec is_active(lstate()) -> boolean().
+
+is_active(#lstate{prefetch_limited = Limited}) -> Limited.
+
+-spec get_prefetch_limit(lstate()) -> non_neg_integer().
+
+get_prefetch_limit(#lstate{prefetch_limited = false}) -> 0;
+get_prefetch_limit(L) ->
+ gen_server:call(L#lstate.pid, get_prefetch_limit, infinity).
+
+-spec ack(lstate(), non_neg_integer()) -> 'ok'.
+
+ack(#lstate{prefetch_limited = false}, _AckCount) -> ok;
+ack(L, AckCount) -> gen_server:cast(L#lstate.pid, {ack, AckCount}).
+
+-spec pid(lstate()) -> pid().
+
+pid(#lstate{pid = Pid}) -> Pid.
+
+-spec client(pid()) -> qstate().
+
+client(Pid) -> #qstate{pid = Pid, state = dormant, credits = gb_trees:empty()}.
+
+-spec activate(qstate()) -> qstate().
+
+activate(L = #qstate{state = dormant}) ->
+ ok = gen_server:cast(L#qstate.pid, {register, self()}),
+ L#qstate{state = active};
+activate(L) -> L.
+
+-spec can_send(qstate(), boolean(), rabbit_types:ctag()) ->
+ {'continue' | 'suspend', qstate()}.
+
+can_send(L = #qstate{pid = Pid, state = State, credits = Credits},
+ AckRequired, CTag) ->
+ case is_consumer_blocked(L, CTag) of
+ false -> case (State =/= active orelse
+ safe_call(Pid, {can_send, self(), AckRequired}, true)) of
+ true -> Credits1 = decrement_credit(CTag, Credits),
+ {continue, L#qstate{credits = Credits1}};
+ false -> {suspend, L#qstate{state = suspended}}
+ end;
+ true -> {suspend, L}
+ end.
+
+safe_call(Pid, Msg, ExitValue) ->
+ rabbit_misc:with_exit_handler(
+ fun () -> ExitValue end,
+ fun () -> gen_server2:call(Pid, Msg, infinity) end).
+
+-spec resume(qstate()) -> qstate().
+
+resume(L = #qstate{state = suspended}) ->
+ L#qstate{state = active};
+resume(L) -> L.
+
+-spec deactivate(qstate()) -> qstate().
+
+deactivate(L = #qstate{state = dormant}) -> L;
+deactivate(L) ->
+ ok = gen_server:cast(L#qstate.pid, {unregister, self()}),
+ L#qstate{state = dormant}.
+
+-spec is_suspended(qstate()) -> boolean().
+
+is_suspended(#qstate{state = suspended}) -> true;
+is_suspended(#qstate{}) -> false.
+
+-spec is_consumer_blocked(qstate(), rabbit_types:ctag()) -> boolean().
+
+is_consumer_blocked(#qstate{credits = Credits}, CTag) ->
+ case gb_trees:lookup(CTag, Credits) of
+ none -> false;
+ {value, #credit{credit = C}} when C > 0 -> false;
+ {value, #credit{}} -> true
+ end.
+
+-spec credit
+ (qstate(), rabbit_types:ctag(), non_neg_integer(), credit_mode(),
+ boolean()) ->
+ {boolean(), qstate()}.
+
+credit(Limiter = #qstate{credits = Credits}, CTag, Crd, Mode, IsEmpty) ->
+ {Res, Cr} =
+ case IsEmpty andalso Mode =:= drain of
+ true -> {true, #credit{credit = 0, mode = manual}};
+ false -> {false, #credit{credit = Crd, mode = Mode}}
+ end,
+ {Res, Limiter#qstate{credits = enter_credit(CTag, Cr, Credits)}}.
+
+-spec ack_from_queue(qstate(), rabbit_types:ctag(), non_neg_integer()) ->
+ {boolean(), qstate()}.
+
+ack_from_queue(Limiter = #qstate{credits = Credits}, CTag, Credit) ->
+ {Credits1, Unblocked} =
+ case gb_trees:lookup(CTag, Credits) of
+ {value, C = #credit{mode = auto, credit = C0}} ->
+ {update_credit(CTag, C#credit{credit = C0 + Credit}, Credits),
+ C0 =:= 0 andalso Credit =/= 0};
+ _ ->
+ {Credits, false}
+ end,
+ {Unblocked, Limiter#qstate{credits = Credits1}}.
+
+-spec drained(qstate()) ->
+ {[{rabbit_types:ctag(), non_neg_integer()}], qstate()}.
+
+drained(Limiter = #qstate{credits = Credits}) ->
+ Drain = fun(C) -> C#credit{credit = 0, mode = manual} end,
+ {CTagCredits, Credits2} =
+ rabbit_misc:gb_trees_fold(
+ fun (CTag, C = #credit{credit = Crd, mode = drain}, {Acc, Creds0}) ->
+ {[{CTag, Crd} | Acc], update_credit(CTag, Drain(C), Creds0)};
+ (_CTag, #credit{credit = _Crd, mode = _Mode}, {Acc, Creds0}) ->
+ {Acc, Creds0}
+ end, {[], Credits}, Credits),
+ {CTagCredits, Limiter#qstate{credits = Credits2}}.
+
+-spec forget_consumer(qstate(), rabbit_types:ctag()) -> qstate().
+
+forget_consumer(Limiter = #qstate{credits = Credits}, CTag) ->
+ Limiter#qstate{credits = gb_trees:delete_any(CTag, Credits)}.
+
+%%----------------------------------------------------------------------------
+%% Queue-local code
+%%----------------------------------------------------------------------------
+
+%% We want to do all the AMQP 1.0-ish link level credit calculations
+%% in the queue (to do them elsewhere introduces a ton of
+%% races). However, it's a big chunk of code that is conceptually very
+%% linked to the limiter concept. So we get the queue to hold a bit of
+%% state for us (#qstate.credits), and maintain a fiction that the
+%% limiter is making the decisions...
+
+decrement_credit(CTag, Credits) ->
+ case gb_trees:lookup(CTag, Credits) of
+ {value, C = #credit{credit = Credit}} ->
+ update_credit(CTag, C#credit{credit = Credit - 1}, Credits);
+ none ->
+ Credits
+ end.
+
+enter_credit(CTag, C, Credits) ->
+ gb_trees:enter(CTag, ensure_credit_invariant(C), Credits).
+
+update_credit(CTag, C, Credits) ->
+ gb_trees:update(CTag, ensure_credit_invariant(C), Credits).
+
+ensure_credit_invariant(C = #credit{credit = 0, mode = drain}) ->
+ %% Using up all credit implies no need to send a 'drained' event
+ C#credit{mode = manual};
+ensure_credit_invariant(C) ->
+ C.
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
+
+init([ProcName]) -> ?store_proc_name(ProcName),
+ ?LG_PROCESS_TYPE(limiter),
+ {ok, #lim{}}.
+
+prioritise_call(get_prefetch_limit, _From, _Len, _State) -> 9;
+prioritise_call(_Msg, _From, _Len, _State) -> 0.
+
+handle_call({new, ChPid}, _From, State = #lim{ch_pid = undefined}) ->
+ {reply, ok, State#lim{ch_pid = ChPid}};
+
+handle_call({limit_prefetch, PrefetchCount, UnackedCount}, _From,
+ State = #lim{prefetch_count = 0}) ->
+ {reply, ok, maybe_notify(State, State#lim{prefetch_count = PrefetchCount,
+ volume = UnackedCount})};
+handle_call({limit_prefetch, PrefetchCount, _UnackedCount}, _From, State) ->
+ {reply, ok, maybe_notify(State, State#lim{prefetch_count = PrefetchCount})};
+
+handle_call(unlimit_prefetch, _From, State) ->
+ {reply, ok, maybe_notify(State, State#lim{prefetch_count = 0,
+ volume = 0})};
+
+handle_call(get_prefetch_limit, _From,
+ State = #lim{prefetch_count = PrefetchCount}) ->
+ {reply, PrefetchCount, State};
+
+handle_call({can_send, QPid, AckRequired}, _From,
+ State = #lim{volume = Volume}) ->
+ case prefetch_limit_reached(State) of
+ true -> {reply, false, limit_queue(QPid, State)};
+ false -> {reply, true, State#lim{volume = if AckRequired -> Volume + 1;
+ true -> Volume
+ end}}
+ end.
+
+handle_cast({ack, Count}, State = #lim{volume = Volume}) ->
+ NewVolume = if Volume == 0 -> 0;
+ true -> Volume - Count
+ end,
+ {noreply, maybe_notify(State, State#lim{volume = NewVolume})};
+
+handle_cast({register, QPid}, State) ->
+ {noreply, remember_queue(QPid, State)};
+
+handle_cast({unregister, QPid}, State) ->
+ {noreply, forget_queue(QPid, State)}.
+
+handle_info({'DOWN', _MonitorRef, _Type, QPid, _Info}, State) ->
+ {noreply, forget_queue(QPid, State)}.
+
+terminate(_, _) ->
+ ok.
+
+code_change(_, State, _) ->
+ {ok, State}.
+
+%%----------------------------------------------------------------------------
+%% Internal plumbing
+%%----------------------------------------------------------------------------
+
+maybe_notify(OldState, NewState) ->
+ case prefetch_limit_reached(OldState) andalso
+ not prefetch_limit_reached(NewState) of
+ true -> notify_queues(NewState);
+ false -> NewState
+ end.
+
+prefetch_limit_reached(#lim{prefetch_count = Limit, volume = Volume}) ->
+ Limit =/= 0 andalso Volume >= Limit.
+
+remember_queue(QPid, State = #lim{queues = Queues}) ->
+ case maps:is_key(QPid, Queues) of
+ false -> MRef = erlang:monitor(process, QPid),
+ State#lim{queues = maps:put(QPid, {MRef, false}, Queues)};
+ true -> State
+ end.
+
+forget_queue(QPid, State = #lim{queues = Queues}) ->
+ case maps:find(QPid, Queues) of
+ {ok, {MRef, _}} -> true = erlang:demonitor(MRef),
+ State#lim{queues = maps:remove(QPid, Queues)};
+ error -> State
+ end.
+
+limit_queue(QPid, State = #lim{queues = Queues}) ->
+ UpdateFun = fun ({MRef, _}) -> {MRef, true} end,
+ State#lim{queues = maps:update_with(QPid, UpdateFun, Queues)}.
+
+notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) ->
+ {QList, NewQueues} =
+ maps:fold(fun (_QPid, {_, false}, Acc) -> Acc;
+ (QPid, {MRef, true}, {L, D}) ->
+ {[QPid | L], maps:put(QPid, {MRef, false}, D)}
+ end, {[], Queues}, Queues),
+ case length(QList) of
+ 0 -> ok;
+ 1 -> ok = rabbit_amqqueue:resume(hd(QList), ChPid); %% common case
+ L ->
+ %% We randomly vary the position of queues in the list,
+ %% thus ensuring that each queue has an equal chance of
+ %% being notified first.
+ {L1, L2} = lists:split(rand:uniform(L), QList),
+ [[ok = rabbit_amqqueue:resume(Q, ChPid) || Q <- L3]
+ || L3 <- [L2, L1]],
+ ok
+ end,
+ State#lim{queues = NewQueues}.
diff --git a/deps/rabbit/src/rabbit_log_tail.erl b/deps/rabbit/src/rabbit_log_tail.erl
new file mode 100644
index 0000000000..c3faad07fc
--- /dev/null
+++ b/deps/rabbit/src/rabbit_log_tail.erl
@@ -0,0 +1,102 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_log_tail).
+
+-export([tail_n_lines/2]).
+-export([init_tail_stream/4]).
+
+-define(GUESS_OFFSET, 200).
+
+init_tail_stream(Filename, Pid, Ref, Duration) ->
+ RPCProc = self(),
+ Reader = spawn(fun() ->
+ link(Pid),
+ case file:open(Filename, [read, binary]) of
+ {ok, File} ->
+ TimeLimit = case Duration of
+ infinity -> infinity;
+ _ -> erlang:system_time(second) + Duration
+ end,
+ {ok, _} = file:position(File, eof),
+ RPCProc ! {Ref, opened},
+ read_loop(File, Pid, Ref, TimeLimit);
+ {error, _} = Err ->
+ RPCProc ! {Ref, Err}
+ end
+ end),
+ receive
+ {Ref, opened} -> {ok, Ref};
+ {Ref, {error, Err}} -> {error, Err}
+ after 5000 ->
+ exit(Reader, timeout),
+ {error, timeout}
+ end.
+
+read_loop(File, Pid, Ref, TimeLimit) ->
+ case is_integer(TimeLimit) andalso erlang:system_time(second) > TimeLimit of
+ true -> Pid ! {Ref, <<>>, finished};
+ false ->
+ case file:read(File, ?GUESS_OFFSET) of
+ {ok, Data} ->
+ Pid ! {Ref, Data, confinue},
+ read_loop(File, Pid, Ref, TimeLimit);
+ eof ->
+ timer:sleep(1000),
+ read_loop(File, Pid, Ref, TimeLimit);
+ {error, _} = Err ->
+ Pid ! {Ref, Err, finished}
+ end
+ end.
+
+tail_n_lines(Filename, N) ->
+ case file:open(Filename, [read, binary]) of
+ {ok, File} ->
+ {ok, Eof} = file:position(File, eof),
+ %% Eof may move. Only read up to the current one.
+ Result = reverse_read_n_lines(N, N, File, Eof, Eof),
+ file:close(File),
+ Result;
+ {error, _} = Error -> Error
+ end.
+
+reverse_read_n_lines(N, OffsetN, File, Position, Eof) ->
+ GuessPosition = offset(Position, OffsetN),
+ case read_lines_from_position(File, GuessPosition, Eof) of
+ {ok, Lines} ->
+ NLines = length(Lines),
+ case {NLines >= N, GuessPosition == 0} of
+ %% Take only N lines if there is more
+ {true, _} -> lists:nthtail(NLines - N, Lines);
+ %% Safe to assume that NLines is less then N
+ {_, true} -> Lines;
+ %% Adjust position
+ _ ->
+ reverse_read_n_lines(N, N - NLines + 1, File, GuessPosition, Eof)
+ end;
+ {error, _} = Error -> Error
+ end.
+
+read_from_position(File, GuessPosition, Eof) ->
+ file:pread(File, GuessPosition, max(0, Eof - GuessPosition)).
+
+read_lines_from_position(File, GuessPosition, Eof) ->
+ case read_from_position(File, GuessPosition, Eof) of
+ {ok, Data} ->
+ Lines = binary:split(Data, <<"\n">>, [global, trim]),
+ case {GuessPosition, Lines} of
+ %% If position is 0 - there are no partial lines
+ {0, _} -> {ok, Lines};
+ %% Remove first line as it can be partial
+ {_, [_ | Rest]} -> {ok, Rest};
+ {_, []} -> {ok, []}
+ end;
+ {error, _} = Error -> Error
+ end.
+
+offset(Base, N) ->
+ max(0, Base - N * ?GUESS_OFFSET).
diff --git a/deps/rabbit/src/rabbit_looking_glass.erl b/deps/rabbit/src/rabbit_looking_glass.erl
new file mode 100644
index 0000000000..00b1b6d46b
--- /dev/null
+++ b/deps/rabbit/src/rabbit_looking_glass.erl
@@ -0,0 +1,48 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_looking_glass).
+
+-ignore_xref([{lg, trace, 4}]).
+-ignore_xref([{maps, from_list, 1}]).
+
+-export([boot/0]).
+-export([connections/0]).
+
+boot() ->
+ case os:getenv("RABBITMQ_TRACER") of
+ false ->
+ ok;
+ Value ->
+ Input = parse_value(Value),
+ rabbit_log:info(
+ "Enabling Looking Glass profiler, input value: ~p",
+ [Input]
+ ),
+ {ok, _} = application:ensure_all_started(looking_glass),
+ lg:trace(
+ Input,
+ lg_file_tracer,
+ "traces.lz4",
+ maps:from_list([
+ {mode, profile},
+ {process_dump, true},
+ {running, true},
+ {send, true}]
+ )
+ )
+ end.
+
+parse_value(Value) ->
+ [begin
+ [Mod, Fun] = string:tokens(C, ":"),
+ {callback, list_to_atom(Mod), list_to_atom(Fun)}
+ end || C <- string:tokens(Value, ",")].
+
+connections() ->
+ Pids = [Pid || {{conns_sup, _}, Pid} <- ets:tab2list(ranch_server)],
+ ['_', {scope, Pids}].
diff --git a/deps/rabbit/src/rabbit_maintenance.erl b/deps/rabbit/src/rabbit_maintenance.erl
new file mode 100644
index 0000000000..e5434dc888
--- /dev/null
+++ b/deps/rabbit/src/rabbit_maintenance.erl
@@ -0,0 +1,354 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_maintenance).
+
+-include("rabbit.hrl").
+
+-export([
+ is_enabled/0,
+ drain/0,
+ revive/0,
+ mark_as_being_drained/0,
+ unmark_as_being_drained/0,
+ is_being_drained_local_read/1,
+ is_being_drained_consistent_read/1,
+ status_local_read/1,
+ status_consistent_read/1,
+ filter_out_drained_nodes_local_read/1,
+ filter_out_drained_nodes_consistent_read/1,
+ suspend_all_client_listeners/0,
+ resume_all_client_listeners/0,
+ close_all_client_connections/0,
+ primary_replica_transfer_candidate_nodes/0,
+ random_primary_replica_transfer_candidate_node/1,
+ transfer_leadership_of_quorum_queues/1,
+ transfer_leadership_of_classic_mirrored_queues/1,
+ status_table_name/0,
+ status_table_definition/0
+]).
+
+-define(TABLE, rabbit_node_maintenance_states).
+-define(FEATURE_FLAG, maintenance_mode_status).
+-define(DEFAULT_STATUS, regular).
+-define(DRAINING_STATUS, draining).
+
+-type maintenance_status() :: ?DEFAULT_STATUS | ?DRAINING_STATUS.
+-type mnesia_table() :: atom().
+
+-export_type([
+ maintenance_status/0
+]).
+
+%%
+%% API
+%%
+
+-spec status_table_name() -> mnesia_table().
+status_table_name() ->
+ ?TABLE.
+
+-spec status_table_definition() -> list().
+status_table_definition() ->
+ maps:to_list(#{
+ record_name => node_maintenance_state,
+ attributes => record_info(fields, node_maintenance_state)
+ }).
+
+-spec is_enabled() -> boolean().
+is_enabled() ->
+ rabbit_feature_flags:is_enabled(?FEATURE_FLAG).
+
+-spec drain() -> ok.
+drain() ->
+ case is_enabled() of
+ true -> do_drain();
+ false -> rabbit_log:warning("Feature flag `~s` is not enabled, draining is a no-op", [?FEATURE_FLAG])
+ end.
+
+-spec do_drain() -> ok.
+do_drain() ->
+ rabbit_log:alert("This node is being put into maintenance (drain) mode"),
+ mark_as_being_drained(),
+ rabbit_log:info("Marked this node as undergoing maintenance"),
+ suspend_all_client_listeners(),
+ rabbit_log:alert("Suspended all listeners and will no longer accept client connections"),
+ {ok, NConnections} = close_all_client_connections(),
+ %% allow plugins to react e.g. by closing their protocol connections
+ rabbit_event:notify(maintenance_connections_closed, #{
+ reason => <<"node is being put into maintenance">>
+ }),
+ rabbit_log:alert("Closed ~b local client connections", [NConnections]),
+
+ TransferCandidates = primary_replica_transfer_candidate_nodes(),
+ ReadableCandidates = readable_candidate_list(TransferCandidates),
+ rabbit_log:info("Node will transfer primary replicas of its queues to ~b peers: ~s",
+ [length(TransferCandidates), ReadableCandidates]),
+ transfer_leadership_of_classic_mirrored_queues(TransferCandidates),
+ transfer_leadership_of_quorum_queues(TransferCandidates),
+ stop_local_quorum_queue_followers(),
+
+ %% allow plugins to react
+ rabbit_event:notify(maintenance_draining, #{
+ reason => <<"node is being put into maintenance">>
+ }),
+ rabbit_log:alert("Node is ready to be shut down for maintenance or upgrade"),
+
+ ok.
+
+-spec revive() -> ok.
+revive() ->
+ case is_enabled() of
+ true -> do_revive();
+ false -> rabbit_log:warning("Feature flag `~s` is not enabled, reviving is a no-op", [?FEATURE_FLAG])
+ end.
+
+-spec do_revive() -> ok.
+do_revive() ->
+ rabbit_log:alert("This node is being revived from maintenance (drain) mode"),
+ revive_local_quorum_queue_replicas(),
+ rabbit_log:alert("Resumed all listeners and will accept client connections again"),
+ resume_all_client_listeners(),
+ rabbit_log:alert("Resumed all listeners and will accept client connections again"),
+ unmark_as_being_drained(),
+ rabbit_log:info("Marked this node as back from maintenance and ready to serve clients"),
+
+ %% allow plugins to react
+ rabbit_event:notify(maintenance_revived, #{}),
+
+ ok.
+
+-spec mark_as_being_drained() -> boolean().
+mark_as_being_drained() ->
+ rabbit_log:debug("Marking the node as undergoing maintenance"),
+ set_maintenance_status_status(?DRAINING_STATUS).
+
+-spec unmark_as_being_drained() -> boolean().
+unmark_as_being_drained() ->
+ rabbit_log:debug("Unmarking the node as undergoing maintenance"),
+ set_maintenance_status_status(?DEFAULT_STATUS).
+
+set_maintenance_status_status(Status) ->
+ Res = mnesia:transaction(fun () ->
+ case mnesia:wread({?TABLE, node()}) of
+ [] ->
+ Row = #node_maintenance_state{
+ node = node(),
+ status = Status
+ },
+ mnesia:write(?TABLE, Row, write);
+ [Row0] ->
+ Row = Row0#node_maintenance_state{
+ node = node(),
+ status = Status
+ },
+ mnesia:write(?TABLE, Row, write)
+ end
+ end),
+ case Res of
+ {atomic, ok} -> true;
+ _ -> false
+ end.
+
+
+-spec is_being_drained_local_read(node()) -> boolean().
+is_being_drained_local_read(Node) ->
+ Status = status_local_read(Node),
+ Status =:= ?DRAINING_STATUS.
+
+-spec is_being_drained_consistent_read(node()) -> boolean().
+is_being_drained_consistent_read(Node) ->
+ Status = status_consistent_read(Node),
+ Status =:= ?DRAINING_STATUS.
+
+-spec status_local_read(node()) -> maintenance_status().
+status_local_read(Node) ->
+ case catch mnesia:dirty_read(?TABLE, Node) of
+ [] -> ?DEFAULT_STATUS;
+ [#node_maintenance_state{node = Node, status = Status}] ->
+ Status;
+ _ -> ?DEFAULT_STATUS
+ end.
+
+-spec status_consistent_read(node()) -> maintenance_status().
+status_consistent_read(Node) ->
+ case mnesia:transaction(fun() -> mnesia:read(?TABLE, Node) end) of
+ {atomic, []} -> ?DEFAULT_STATUS;
+ {atomic, [#node_maintenance_state{node = Node, status = Status}]} ->
+ Status;
+ {atomic, _} -> ?DEFAULT_STATUS;
+ {aborted, _Reason} -> ?DEFAULT_STATUS
+ end.
+
+ -spec filter_out_drained_nodes_local_read([node()]) -> [node()].
+filter_out_drained_nodes_local_read(Nodes) ->
+ lists:filter(fun(N) -> not is_being_drained_local_read(N) end, Nodes).
+
+-spec filter_out_drained_nodes_consistent_read([node()]) -> [node()].
+filter_out_drained_nodes_consistent_read(Nodes) ->
+ lists:filter(fun(N) -> not is_being_drained_consistent_read(N) end, Nodes).
+
+-spec suspend_all_client_listeners() -> rabbit_types:ok_or_error(any()).
+ %% Pauses all listeners on the current node except for
+ %% Erlang distribution (clustering and CLI tools).
+ %% A respausedumed listener will not accept any new client connections
+ %% but previously established connections won't be interrupted.
+suspend_all_client_listeners() ->
+ Listeners = rabbit_networking:node_client_listeners(node()),
+ rabbit_log:info("Asked to suspend ~b client connection listeners. "
+ "No new client connections will be accepted until these listeners are resumed!", [length(Listeners)]),
+ Results = lists:foldl(local_listener_fold_fun(fun ranch:suspend_listener/1), [], Listeners),
+ lists:foldl(fun ok_or_first_error/2, ok, Results).
+
+ -spec resume_all_client_listeners() -> rabbit_types:ok_or_error(any()).
+ %% Resumes all listeners on the current node except for
+ %% Erlang distribution (clustering and CLI tools).
+ %% A resumed listener will accept new client connections.
+resume_all_client_listeners() ->
+ Listeners = rabbit_networking:node_client_listeners(node()),
+ rabbit_log:info("Asked to resume ~b client connection listeners. "
+ "New client connections will be accepted from now on", [length(Listeners)]),
+ Results = lists:foldl(local_listener_fold_fun(fun ranch:resume_listener/1), [], Listeners),
+ lists:foldl(fun ok_or_first_error/2, ok, Results).
+
+ -spec close_all_client_connections() -> {'ok', non_neg_integer()}.
+close_all_client_connections() ->
+ Pids = rabbit_networking:local_connections(),
+ rabbit_networking:close_connections(Pids, "Node was put into maintenance mode"),
+ {ok, length(Pids)}.
+
+-spec transfer_leadership_of_quorum_queues([node()]) -> ok.
+transfer_leadership_of_quorum_queues([]) ->
+ rabbit_log:warning("Skipping leadership transfer of quorum queues: no candidate "
+ "(online, not under maintenance) nodes to transfer to!");
+transfer_leadership_of_quorum_queues(_TransferCandidates) ->
+ %% we only transfer leadership for QQs that have local leaders
+ Queues = rabbit_amqqueue:list_local_leaders(),
+ rabbit_log:info("Will transfer leadership of ~b quorum queues with current leader on this node",
+ [length(Queues)]),
+ [begin
+ Name = amqqueue:get_name(Q),
+ rabbit_log:debug("Will trigger a leader election for local quorum queue ~s",
+ [rabbit_misc:rs(Name)]),
+ %% we trigger an election and exclude this node from the list of candidates
+ %% by simply shutting its local QQ replica (Ra server)
+ RaLeader = amqqueue:get_pid(Q),
+ rabbit_log:debug("Will stop Ra server ~p", [RaLeader]),
+ case ra:stop_server(RaLeader) of
+ ok ->
+ rabbit_log:debug("Successfully stopped Ra server ~p", [RaLeader]);
+ {error, nodedown} ->
+ rabbit_log:error("Failed to stop Ra server ~p: target node was reported as down")
+ end
+ end || Q <- Queues],
+ rabbit_log:info("Leadership transfer for quorum queues hosted on this node has been initiated").
+
+-spec transfer_leadership_of_classic_mirrored_queues([node()]) -> ok.
+ transfer_leadership_of_classic_mirrored_queues([]) ->
+ rabbit_log:warning("Skipping leadership transfer of classic mirrored queues: no candidate "
+ "(online, not under maintenance) nodes to transfer to!");
+transfer_leadership_of_classic_mirrored_queues(TransferCandidates) ->
+ Queues = rabbit_amqqueue:list_local_mirrored_classic_queues(),
+ ReadableCandidates = readable_candidate_list(TransferCandidates),
+ rabbit_log:info("Will transfer leadership of ~b classic mirrored queues hosted on this node to these peer nodes: ~s",
+ [length(Queues), ReadableCandidates]),
+
+ [begin
+ Name = amqqueue:get_name(Q),
+ case random_primary_replica_transfer_candidate_node(TransferCandidates) of
+ {ok, Pick} ->
+ rabbit_log:debug("Will transfer leadership of local queue ~s to node ~s",
+ [rabbit_misc:rs(Name), Pick]),
+ case rabbit_mirror_queue_misc:transfer_leadership(Q, Pick) of
+ {migrated, _} ->
+ rabbit_log:debug("Successfully transferred leadership of queue ~s to node ~s",
+ [rabbit_misc:rs(Name), Pick]);
+ Other ->
+ rabbit_log:warning("Could not transfer leadership of queue ~s to node ~s: ~p",
+ [rabbit_misc:rs(Name), Pick, Other])
+ end;
+ undefined ->
+ rabbit_log:warning("Could not transfer leadership of queue ~s: no suitable candidates?",
+ [Name])
+ end
+ end || Q <- Queues],
+ rabbit_log:info("Leadership transfer for local classic mirrored queues is complete").
+
+-spec stop_local_quorum_queue_followers() -> ok.
+stop_local_quorum_queue_followers() ->
+ Queues = rabbit_amqqueue:list_local_followers(),
+ rabbit_log:info("Will stop local follower replicas of ~b quorum queues on this node",
+ [length(Queues)]),
+ [begin
+ Name = amqqueue:get_name(Q),
+ rabbit_log:debug("Will stop a local follower replica of quorum queue ~s",
+ [rabbit_misc:rs(Name)]),
+ %% shut down Ra nodes so that they are not considered for leader election
+ {RegisteredName, _LeaderNode} = amqqueue:get_pid(Q),
+ RaNode = {RegisteredName, node()},
+ rabbit_log:debug("Will stop Ra server ~p", [RaNode]),
+ case ra:stop_server(RaNode) of
+ ok ->
+ rabbit_log:debug("Successfully stopped Ra server ~p", [RaNode]);
+ {error, nodedown} ->
+ rabbit_log:error("Failed to stop Ra server ~p: target node was reported as down")
+ end
+ end || Q <- Queues],
+ rabbit_log:info("Stopped all local replicas of quorum queues hosted on this node").
+
+ -spec primary_replica_transfer_candidate_nodes() -> [node()].
+primary_replica_transfer_candidate_nodes() ->
+ filter_out_drained_nodes_consistent_read(rabbit_nodes:all_running() -- [node()]).
+
+-spec random_primary_replica_transfer_candidate_node([node()]) -> {ok, node()} | undefined.
+random_primary_replica_transfer_candidate_node([]) ->
+ undefined;
+random_primary_replica_transfer_candidate_node(Candidates) ->
+ Nth = erlang:phash2(erlang:monotonic_time(), length(Candidates)),
+ Candidate = lists:nth(Nth + 1, Candidates),
+ {ok, Candidate}.
+
+revive_local_quorum_queue_replicas() ->
+ Queues = rabbit_amqqueue:list_local_followers(),
+ [begin
+ Name = amqqueue:get_name(Q),
+ rabbit_log:debug("Will trigger a leader election for local quorum queue ~s",
+ [rabbit_misc:rs(Name)]),
+ %% start local QQ replica (Ra server) of this queue
+ {Prefix, _Node} = amqqueue:get_pid(Q),
+ RaServer = {Prefix, node()},
+ rabbit_log:debug("Will start Ra server ~p", [RaServer]),
+ case ra:restart_server(RaServer) of
+ ok ->
+ rabbit_log:debug("Successfully restarted Ra server ~p", [RaServer]);
+ {error, {already_started, _Pid}} ->
+ rabbit_log:debug("Ra server ~p is already running", [RaServer]);
+ {error, nodedown} ->
+ rabbit_log:error("Failed to restart Ra server ~p: target node was reported as down")
+ end
+ end || Q <- Queues],
+ rabbit_log:info("Restart of local quorum queue replicas is complete").
+
+%%
+%% Implementation
+%%
+
+local_listener_fold_fun(Fun) ->
+ fun(#listener{node = Node, ip_address = Addr, port = Port}, Acc) when Node =:= node() ->
+ RanchRef = rabbit_networking:ranch_ref(Addr, Port),
+ [Fun(RanchRef) | Acc];
+ (_, Acc) ->
+ Acc
+ end.
+
+ok_or_first_error(ok, Acc) ->
+ Acc;
+ok_or_first_error({error, _} = Err, _Acc) ->
+ Err.
+
+readable_candidate_list(Nodes) ->
+ string:join(lists:map(fun rabbit_data_coercion:to_list/1, Nodes), ", ").
diff --git a/deps/rabbit/src/rabbit_memory_monitor.erl b/deps/rabbit/src/rabbit_memory_monitor.erl
new file mode 100644
index 0000000000..5934a97cff
--- /dev/null
+++ b/deps/rabbit/src/rabbit_memory_monitor.erl
@@ -0,0 +1,259 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+
+%% This module handles the node-wide memory statistics.
+%% It receives statistics from all queues, counts the desired
+%% queue length (in seconds), and sends this information back to
+%% queues.
+
+-module(rabbit_memory_monitor).
+
+-behaviour(gen_server2).
+
+-export([start_link/0, register/2, deregister/1,
+ report_ram_duration/2, stop/0, conserve_resources/3, memory_use/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-record(process, {pid, reported, sent, callback, monitor}).
+
+-record(state, {timer, %% 'internal_update' timer
+ queue_durations, %% ets #process
+ queue_duration_sum, %% sum of all queue_durations
+ queue_duration_count, %% number of elements in sum
+ desired_duration, %% the desired queue duration
+ disk_alarm %% disable paging, disk alarm has fired
+ }).
+
+-define(SERVER, ?MODULE).
+-define(TABLE_NAME, ?MODULE).
+
+%% If all queues are pushed to disk (duration 0), then the sum of
+%% their reported lengths will be 0. If memory then becomes available,
+%% unless we manually intervene, the sum will remain 0, and the queues
+%% will never get a non-zero duration. Thus when the mem use is <
+%% SUM_INC_THRESHOLD, increase the sum artificially by SUM_INC_AMOUNT.
+-define(SUM_INC_THRESHOLD, 0.95).
+-define(SUM_INC_AMOUNT, 1.0).
+
+-define(EPSILON, 0.000001). %% less than this and we clamp to 0
+
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+-spec register(pid(), {atom(),atom(),[any()]}) -> 'ok'.
+
+register(Pid, MFA = {_M, _F, _A}) ->
+ gen_server2:call(?SERVER, {register, Pid, MFA}, infinity).
+
+-spec deregister(pid()) -> 'ok'.
+
+deregister(Pid) ->
+ gen_server2:cast(?SERVER, {deregister, Pid}).
+
+-spec report_ram_duration
+ (pid(), float() | 'infinity') -> number() | 'infinity'.
+
+report_ram_duration(Pid, QueueDuration) ->
+ gen_server2:call(?SERVER,
+ {report_ram_duration, Pid, QueueDuration}, infinity).
+
+-spec stop() -> 'ok'.
+
+stop() ->
+ gen_server2:cast(?SERVER, stop).
+
+%% Paging should be enabled/disabled only in response to disk resource alarms
+%% for the current node.
+conserve_resources(Pid, disk, {_, Conserve, Node}) when node(Pid) =:= Node ->
+ gen_server2:cast(Pid, {disk_alarm, Conserve});
+conserve_resources(_Pid, _Source, _Conserve) ->
+ ok.
+
+memory_use(Type) ->
+ vm_memory_monitor:get_memory_use(Type).
+
+%%----------------------------------------------------------------------------
+%% Gen_server callbacks
+%%----------------------------------------------------------------------------
+
+init([]) ->
+ {ok, Interval} = application:get_env(rabbit, memory_monitor_interval),
+ {ok, TRef} = timer:send_interval(Interval, update),
+
+ Ets = ets:new(?TABLE_NAME, [set, private, {keypos, #process.pid}]),
+ Alarms = rabbit_alarm:register(self(), {?MODULE, conserve_resources, []}),
+ {ok, internal_update(
+ #state { timer = TRef,
+ queue_durations = Ets,
+ queue_duration_sum = 0.0,
+ queue_duration_count = 0,
+ desired_duration = infinity,
+ disk_alarm = lists:member(disk, Alarms)})}.
+
+handle_call({report_ram_duration, Pid, QueueDuration}, From,
+ State = #state { queue_duration_sum = Sum,
+ queue_duration_count = Count,
+ queue_durations = Durations,
+ desired_duration = SendDuration }) ->
+
+ [Proc = #process { reported = PrevQueueDuration }] =
+ ets:lookup(Durations, Pid),
+
+ gen_server2:reply(From, SendDuration),
+
+ {Sum1, Count1} =
+ case {PrevQueueDuration, QueueDuration} of
+ {infinity, infinity} -> {Sum, Count};
+ {infinity, _} -> {Sum + QueueDuration, Count + 1};
+ {_, infinity} -> {Sum - PrevQueueDuration, Count - 1};
+ {_, _} -> {Sum - PrevQueueDuration + QueueDuration,
+ Count}
+ end,
+ true = ets:insert(Durations, Proc #process { reported = QueueDuration,
+ sent = SendDuration }),
+ {noreply, State #state { queue_duration_sum = zero_clamp(Sum1),
+ queue_duration_count = Count1 }};
+
+handle_call({register, Pid, MFA}, _From,
+ State = #state { queue_durations = Durations }) ->
+ MRef = erlang:monitor(process, Pid),
+ true = ets:insert(Durations, #process { pid = Pid, reported = infinity,
+ sent = infinity, callback = MFA,
+ monitor = MRef }),
+ {reply, ok, State};
+
+handle_call(_Request, _From, State) ->
+ {noreply, State}.
+
+handle_cast({disk_alarm, Alarm}, State = #state{disk_alarm = Alarm}) ->
+ {noreply, State};
+
+handle_cast({disk_alarm, Alarm}, State) ->
+ {noreply, internal_update(State#state{disk_alarm = Alarm})};
+
+handle_cast({deregister, Pid}, State) ->
+ {noreply, internal_deregister(Pid, true, State)};
+
+handle_cast(stop, State) ->
+ {stop, normal, State};
+
+handle_cast(_Request, State) ->
+ {noreply, State}.
+
+handle_info(update, State) ->
+ {noreply, internal_update(State)};
+
+handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
+ {noreply, internal_deregister(Pid, false, State)};
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, #state { timer = TRef }) ->
+ timer:cancel(TRef),
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+
+%%----------------------------------------------------------------------------
+%% Internal functions
+%%----------------------------------------------------------------------------
+
+zero_clamp(Sum) when Sum < ?EPSILON -> 0.0;
+zero_clamp(Sum) -> Sum.
+
+internal_deregister(Pid, Demonitor,
+ State = #state { queue_duration_sum = Sum,
+ queue_duration_count = Count,
+ queue_durations = Durations }) ->
+ case ets:lookup(Durations, Pid) of
+ [] -> State;
+ [#process { reported = PrevQueueDuration, monitor = MRef }] ->
+ true = case Demonitor of
+ true -> erlang:demonitor(MRef);
+ false -> true
+ end,
+ {Sum1, Count1} =
+ case PrevQueueDuration of
+ infinity -> {Sum, Count};
+ _ -> {zero_clamp(Sum - PrevQueueDuration),
+ Count - 1}
+ end,
+ true = ets:delete(Durations, Pid),
+ State #state { queue_duration_sum = Sum1,
+ queue_duration_count = Count1 }
+ end.
+
+internal_update(State = #state{queue_durations = Durations,
+ desired_duration = DesiredDurationAvg,
+ disk_alarm = DiskAlarm}) ->
+ DesiredDurationAvg1 = desired_duration_average(State),
+ ShouldInform = should_inform_predicate(DiskAlarm),
+ case ShouldInform(DesiredDurationAvg, DesiredDurationAvg1) of
+ true -> inform_queues(ShouldInform, DesiredDurationAvg1, Durations);
+ false -> ok
+ end,
+ State#state{desired_duration = DesiredDurationAvg1}.
+
+desired_duration_average(#state{disk_alarm = true}) ->
+ infinity;
+desired_duration_average(#state{disk_alarm = false,
+ queue_duration_sum = Sum,
+ queue_duration_count = Count}) ->
+ {ok, LimitThreshold} =
+ application:get_env(rabbit, vm_memory_high_watermark_paging_ratio),
+ MemoryRatio = memory_use(ratio),
+ if MemoryRatio =:= infinity ->
+ 0.0;
+ MemoryRatio < LimitThreshold orelse Count == 0 ->
+ infinity;
+ MemoryRatio < ?SUM_INC_THRESHOLD ->
+ ((Sum + ?SUM_INC_AMOUNT) / Count) / MemoryRatio;
+ true ->
+ (Sum / Count) / MemoryRatio
+ end.
+
+inform_queues(ShouldInform, DesiredDurationAvg, Durations) ->
+ true =
+ ets:foldl(
+ fun (Proc = #process{reported = QueueDuration,
+ sent = PrevSendDuration,
+ callback = {M, F, A}}, true) ->
+ case ShouldInform(PrevSendDuration, DesiredDurationAvg)
+ andalso ShouldInform(QueueDuration, DesiredDurationAvg) of
+ true -> ok = erlang:apply(
+ M, F, A ++ [DesiredDurationAvg]),
+ ets:insert(
+ Durations,
+ Proc#process{sent = DesiredDurationAvg});
+ false -> true
+ end
+ end, true, Durations).
+
+%% In normal use, we only inform queues immediately if the desired
+%% duration has decreased, we want to ensure timely paging.
+should_inform_predicate(false) -> fun greater_than/2;
+%% When the disk alarm has gone off though, we want to inform queues
+%% immediately if the desired duration has *increased* - we want to
+%% ensure timely stopping paging.
+should_inform_predicate(true) -> fun (D1, D2) -> greater_than(D2, D1) end.
+
+greater_than(infinity, infinity) -> false;
+greater_than(infinity, _D2) -> true;
+greater_than(_D1, infinity) -> false;
+greater_than(D1, D2) -> D1 > D2.
diff --git a/deps/rabbit/src/rabbit_metrics.erl b/deps/rabbit/src/rabbit_metrics.erl
new file mode 100644
index 0000000000..10418e3884
--- /dev/null
+++ b/deps/rabbit/src/rabbit_metrics.erl
@@ -0,0 +1,45 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_metrics).
+
+-behaviour(gen_server).
+
+-export([start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+-define(SERVER, ?MODULE).
+
+%%----------------------------------------------------------------------------
+%% Starts the raw metrics storage and owns the ETS tables.
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+init([]) ->
+ rabbit_core_metrics:init(),
+ {ok, none}.
+
+handle_call(_Request, _From, State) ->
+ {noreply, State}.
+
+handle_cast(_Request, State) ->
+ {noreply, State}.
+
+handle_info(_Msg, State) ->
+ {noreply, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl b/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl
new file mode 100644
index 0000000000..91a7c3ddc8
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl
@@ -0,0 +1,460 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_coordinator).
+
+-export([start_link/4, get_gm/1, ensure_monitoring/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3, handle_pre_hibernate/1]).
+
+-export([joined/2, members_changed/3, handle_msg/3, handle_terminate/2]).
+
+-behaviour(gen_server2).
+-behaviour(gm).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+-include("gm_specs.hrl").
+
+-record(state, { q,
+ gm,
+ monitors,
+ death_fun,
+ depth_fun
+ }).
+
+%%----------------------------------------------------------------------------
+%%
+%% Mirror Queues
+%%
+%% A queue with mirrors consists of the following:
+%%
+%% #amqqueue{ pid, slave_pids }
+%% | |
+%% +----------+ +-------+--------------+-----------...etc...
+%% | | |
+%% V V V
+%% amqqueue_process---+ mirror-----+ mirror-----+ ...etc...
+%% | BQ = master----+ | | BQ = vq | | BQ = vq |
+%% | | BQ = vq | | +-+-------+ +-+-------+
+%% | +-+-------+ | | |
+%% +-++-----|---------+ | | (some details elided)
+%% || | | |
+%% || coordinator-+ | |
+%% || +-+---------+ | |
+%% || | | |
+%% || gm-+ -- -- -- -- gm-+- -- -- -- gm-+- -- --...etc...
+%% || +--+ +--+ +--+
+%% ||
+%% consumers
+%%
+%% The master is merely an implementation of bq, and thus is invoked
+%% through the normal bq interface by the amqqueue_process. The mirrors
+%% meanwhile are processes in their own right (as is the
+%% coordinator). The coordinator and all mirrors belong to the same gm
+%% group. Every member of a gm group receives messages sent to the gm
+%% group. Because the master is the bq of amqqueue_process, it doesn't
+%% have sole control over its mailbox, and as a result, the master
+%% itself cannot be passed messages directly (well, it could by via
+%% the amqqueue:run_backing_queue callback but that would induce
+%% additional unnecessary loading on the master queue process), yet it
+%% needs to react to gm events, such as the death of mirrors. Thus the
+%% master creates the coordinator, and it is the coordinator that is
+%% the gm callback module and event handler for the master.
+%%
+%% Consumers are only attached to the master. Thus the master is
+%% responsible for informing all mirrors when messages are fetched from
+%% the bq, when they're acked, and when they're requeued.
+%%
+%% The basic goal is to ensure that all mirrors performs actions on
+%% their bqs in the same order as the master. Thus the master
+%% intercepts all events going to its bq, and suitably broadcasts
+%% these events on the gm. The mirrors thus receive two streams of
+%% events: one stream is via the gm, and one stream is from channels
+%% directly. Whilst the stream via gm is guaranteed to be consistently
+%% seen by all mirrors , the same is not true of the stream via
+%% channels. For example, in the event of an unexpected death of a
+%% channel during a publish, only some of the mirrors may receive that
+%% publish. As a result of this problem, the messages broadcast over
+%% the gm contain published content, and thus mirrors can operate
+%% successfully on messages that they only receive via the gm.
+%%
+%% The key purpose of also sending messages directly from the channels
+%% to the mirrors is that without this, in the event of the death of
+%% the master, messages could be lost until a suitable mirror is
+%% promoted. However, that is not the only reason. A mirror cannot send
+%% confirms for a message until it has seen it from the
+%% channel. Otherwise, it might send a confirm to a channel for a
+%% message that it might *never* receive from that channel. This can
+%% happen because new mirrors join the gm ring (and thus receive
+%% messages from the master) before inserting themselves in the
+%% queue's mnesia record (which is what channels look at for routing).
+%% As it turns out, channels will simply ignore such bogus confirms,
+%% but relying on that would introduce a dangerously tight coupling.
+%%
+%% Hence the mirrors have to wait until they've seen both the publish
+%% via gm, and the publish via the channel before they issue the
+%% confirm. Either form of publish can arrive first, and a mirror can
+%% be upgraded to the master at any point during this
+%% process. Confirms continue to be issued correctly, however.
+%%
+%% Because the mirror is a full process, it impersonates parts of the
+%% amqqueue API. However, it does not need to implement all parts: for
+%% example, no ack or consumer-related message can arrive directly at
+%% a mirror from a channel: it is only publishes that pass both
+%% directly to the mirrors and go via gm.
+%%
+%% Slaves can be added dynamically. When this occurs, there is no
+%% attempt made to sync the current contents of the master with the
+%% new mirror, thus the mirror will start empty, regardless of the state
+%% of the master. Thus the mirror needs to be able to detect and ignore
+%% operations which are for messages it has not received: because of
+%% the strict FIFO nature of queues in general, this is
+%% straightforward - all new publishes that the new mirror receives via
+%% gm should be processed as normal, but fetches which are for
+%% messages the mirror has never seen should be ignored. Similarly,
+%% acks for messages the mirror never fetched should be
+%% ignored. Similarly, we don't republish rejected messages that we
+%% haven't seen. Eventually, as the master is consumed from, the
+%% messages at the head of the queue which were there before the slave
+%% joined will disappear, and the mirror will become fully synced with
+%% the state of the master.
+%%
+%% The detection of the sync-status is based on the depth of the BQs,
+%% where the depth is defined as the sum of the length of the BQ (as
+%% per BQ:len) and the messages pending an acknowledgement. When the
+%% depth of the mirror is equal to the master's, then the mirror is
+%% synchronised. We only store the difference between the two for
+%% simplicity. Comparing the length is not enough since we need to
+%% take into account rejected messages which will make it back into
+%% the master queue but can't go back in the mirror, since we don't
+%% want "holes" in the mirror queue. Note that the depth, and the
+%% length likewise, must always be shorter on the mirror - we assert
+%% that in various places. In case mirrors are joined to an empty queue
+%% which only goes on to receive publishes, they start by asking the
+%% master to broadcast its depth. This is enough for mirrors to always
+%% be able to work out when their head does not differ from the master
+%% (and is much simpler and cheaper than getting the master to hang on
+%% to the guid of the msg at the head of its queue). When a mirror is
+%% promoted to a master, it unilaterally broadcasts its depth, in
+%% order to solve the problem of depth requests from new mirrors being
+%% unanswered by a dead master.
+%%
+%% Obviously, due to the async nature of communication across gm, the
+%% mirrors can fall behind. This does not matter from a sync pov: if
+%% they fall behind and the master dies then a) no publishes are lost
+%% because all publishes go to all mirrors anyway; b) the worst that
+%% happens is that acks get lost and so messages come back to
+%% life. This is no worse than normal given you never get confirmation
+%% that an ack has been received (not quite true with QoS-prefetch,
+%% but close enough for jazz).
+%%
+%% Because acktags are issued by the bq independently, and because
+%% there is no requirement for the master and all mirrors to use the
+%% same bq, all references to msgs going over gm is by msg_id. Thus
+%% upon acking, the master must convert the acktags back to msg_ids
+%% (which happens to be what bq:ack returns), then sends the msg_ids
+%% over gm, the mirrors must convert the msg_ids to acktags (a mapping
+%% the mirrors themselves must maintain).
+%%
+%% When the master dies, a mirror gets promoted. This will be the
+%% eldest mirror, and thus the hope is that that mirror is most likely
+%% to be sync'd with the master. The design of gm is that the
+%% notification of the death of the master will only appear once all
+%% messages in-flight from the master have been fully delivered to all
+%% members of the gm group. Thus at this point, the mirror that gets
+%% promoted cannot broadcast different events in a different order
+%% than the master for the same msgs: there is no possibility for the
+%% same msg to be processed by the old master and the new master - if
+%% it was processed by the old master then it will have been processed
+%% by the mirror before the mirror was promoted, and vice versa.
+%%
+%% Upon promotion, all msgs pending acks are requeued as normal, the
+%% mirror constructs state suitable for use in the master module, and
+%% then dynamically changes into an amqqueue_process with the master
+%% as the bq, and the slave's bq as the master's bq. Thus the very
+%% same process that was the mirror is now a full amqqueue_process.
+%%
+%% It is important that we avoid memory leaks due to the death of
+%% senders (i.e. channels) and partial publications. A sender
+%% publishing a message may fail mid way through the publish and thus
+%% only some of the mirrors will receive the message. We need the
+%% mirrors to be able to detect this and tidy up as necessary to avoid
+%% leaks. If we just had the master monitoring all senders then we
+%% would have the possibility that a sender appears and only sends the
+%% message to a few of the mirrors before dying. Those mirrors would
+%% then hold on to the message, assuming they'll receive some
+%% instruction eventually from the master. Thus we have both mirrors
+%% and the master monitor all senders they become aware of. But there
+%% is a race: if the mirror receives a DOWN of a sender, how does it
+%% know whether or not the master is going to send it instructions
+%% regarding those messages?
+%%
+%% Whilst the master monitors senders, it can't access its mailbox
+%% directly, so it delegates monitoring to the coordinator. When the
+%% coordinator receives a DOWN message from a sender, it informs the
+%% master via a callback. This allows the master to do any tidying
+%% necessary, but more importantly allows the master to broadcast a
+%% sender_death message to all the mirrors , saying the sender has
+%% died. Once the mirrors receive the sender_death message, they know
+%% that they're not going to receive any more instructions from the gm
+%% regarding that sender. However, it is possible that the coordinator
+%% receives the DOWN and communicates that to the master before the
+%% master has finished receiving and processing publishes from the
+%% sender. This turns out not to be a problem: the sender has actually
+%% died, and so will not need to receive confirms or other feedback,
+%% and should further messages be "received" from the sender, the
+%% master will ask the coordinator to set up a new monitor, and
+%% will continue to process the messages normally. Slaves may thus
+%% receive publishes via gm from previously declared "dead" senders,
+%% but again, this is fine: should the mirror have just thrown out the
+%% message it had received directly from the sender (due to receiving
+%% a sender_death message via gm), it will be able to cope with the
+%% publication purely from the master via gm.
+%%
+%% When a mirror receives a DOWN message for a sender, if it has not
+%% received the sender_death message from the master via gm already,
+%% then it will wait 20 seconds before broadcasting a request for
+%% confirmation from the master that the sender really has died.
+%% Should a sender have only sent a publish to mirrors , this allows
+%% mirrors to inform the master of the previous existence of the
+%% sender. The master will thus monitor the sender, receive the DOWN,
+%% and subsequently broadcast the sender_death message, allowing the
+%% mirrors to tidy up. This process can repeat for the same sender:
+%% consider one mirror receives the publication, then the DOWN, then
+%% asks for confirmation of death, then the master broadcasts the
+%% sender_death message. Only then does another mirror receive the
+%% publication and thus set up its monitoring. Eventually that slave
+%% too will receive the DOWN, ask for confirmation and the master will
+%% monitor the sender again, receive another DOWN, and send out
+%% another sender_death message. Given the 20 second delay before
+%% requesting death confirmation, this is highly unlikely, but it is a
+%% possibility.
+%%
+%% When the 20 second timer expires, the mirror first checks to see
+%% whether it still needs confirmation of the death before requesting
+%% it. This prevents unnecessary traffic on gm as it allows one
+%% broadcast of the sender_death message to satisfy many mirrors.
+%%
+%% If we consider the promotion of a mirror at this point, we have two
+%% possibilities: that of the mirror that has received the DOWN and is
+%% thus waiting for confirmation from the master that the sender
+%% really is down; and that of the mirror that has not received the
+%% DOWN. In the first case, in the act of promotion to master, the new
+%% master will monitor again the dead sender, and after it has
+%% finished promoting itself, it should find another DOWN waiting,
+%% which it will then broadcast. This will allow mirrors to tidy up as
+%% normal. In the second case, we have the possibility that
+%% confirmation-of-sender-death request has been broadcast, but that
+%% it was broadcast before the master failed, and that the mirror being
+%% promoted does not know anything about that sender, and so will not
+%% monitor it on promotion. Thus a mirror that broadcasts such a
+%% request, at the point of broadcasting it, recurses, setting another
+%% 20 second timer. As before, on expiry of the timer, the mirrors
+%% checks to see whether it still has not received a sender_death
+%% message for the dead sender, and if not, broadcasts a death
+%% confirmation request. Thus this ensures that even when a master
+%% dies and the new mirror has no knowledge of the dead sender, it will
+%% eventually receive a death confirmation request, shall monitor the
+%% dead sender, receive the DOWN and broadcast the sender_death
+%% message.
+%%
+%% The preceding commentary deals with the possibility of mirrors
+%% receiving publications from senders which the master does not, and
+%% the need to prevent memory leaks in such scenarios. The inverse is
+%% also possible: a partial publication may cause only the master to
+%% receive a publication. It will then publish the message via gm. The
+%% mirrors will receive it via gm, will publish it to their BQ and will
+%% set up monitoring on the sender. They will then receive the DOWN
+%% message and the master will eventually publish the corresponding
+%% sender_death message. The mirror will then be able to tidy up its
+%% state as normal.
+%%
+%% Recovery of mirrored queues is straightforward: as nodes die, the
+%% remaining nodes record this, and eventually a situation is reached
+%% in which only one node is alive, which is the master. This is the
+%% only node which, upon recovery, will resurrect a mirrored queue:
+%% nodes which die and then rejoin as a mirror will start off empty as
+%% if they have no mirrored content at all. This is not surprising: to
+%% achieve anything more sophisticated would require the master and
+%% recovering mirror to be able to check to see whether they agree on
+%% the last seen state of the queue: checking depth alone is not
+%% sufficient in this case.
+%%
+%% For more documentation see the comments in bug 23554.
+%%
+%%----------------------------------------------------------------------------
+
+-spec start_link
+ (amqqueue:amqqueue(), pid() | 'undefined',
+ rabbit_mirror_queue_master:death_fun(),
+ rabbit_mirror_queue_master:depth_fun()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(Queue, GM, DeathFun, DepthFun) ->
+ gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, DepthFun], []).
+
+-spec get_gm(pid()) -> pid().
+
+get_gm(CPid) ->
+ gen_server2:call(CPid, get_gm, infinity).
+
+-spec ensure_monitoring(pid(), [pid()]) -> 'ok'.
+
+ensure_monitoring(CPid, Pids) ->
+ gen_server2:cast(CPid, {ensure_monitoring, Pids}).
+
+%% ---------------------------------------------------------------------------
+%% gen_server
+%% ---------------------------------------------------------------------------
+
+init([Q, GM, DeathFun, DepthFun]) when ?is_amqqueue(Q) ->
+ QueueName = amqqueue:get_name(Q),
+ ?store_proc_name(QueueName),
+ GM1 = case GM of
+ undefined ->
+ {ok, GM2} = gm:start_link(
+ QueueName, ?MODULE, [self()],
+ fun rabbit_misc:execute_mnesia_transaction/1),
+ receive {joined, GM2, _Members} ->
+ ok
+ end,
+ GM2;
+ _ ->
+ true = link(GM),
+ GM
+ end,
+ {ok, #state { q = Q,
+ gm = GM1,
+ monitors = pmon:new(),
+ death_fun = DeathFun,
+ depth_fun = DepthFun },
+ hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call(get_gm, _From, State = #state { gm = GM }) ->
+ reply(GM, State).
+
+handle_cast({gm_deaths, DeadGMPids}, State = #state{q = Q}) when ?amqqueue_pid_runs_on_local_node(Q) ->
+ QueueName = amqqueue:get_name(Q),
+ MPid = amqqueue:get_pid(Q),
+ case rabbit_mirror_queue_misc:remove_from_queue(
+ QueueName, MPid, DeadGMPids) of
+ {ok, MPid, DeadPids, ExtraNodes} ->
+ rabbit_mirror_queue_misc:report_deaths(MPid, true, QueueName,
+ DeadPids),
+ rabbit_mirror_queue_misc:add_mirrors(QueueName, ExtraNodes, async),
+ noreply(State);
+ {ok, _MPid0, DeadPids, _ExtraNodes} ->
+ %% see rabbitmq-server#914;
+ %% Different mirror is now master, stop current coordinator normally.
+ %% Initiating queue is now mirror and the least we could do is report
+ %% deaths which we 'think' we saw.
+ %% NOTE: Reported deaths here, could be inconsistent.
+ rabbit_mirror_queue_misc:report_deaths(MPid, false, QueueName,
+ DeadPids),
+ {stop, shutdown, State};
+ {error, not_found} ->
+ {stop, normal, State};
+ {error, {not_synced, _}} ->
+ rabbit_log:error("Mirror queue ~p in unexpected state."
+ " Promoted to master but already a master.",
+ [QueueName]),
+ error(unexpected_mirrored_state)
+ end;
+
+handle_cast(request_depth, State = #state{depth_fun = DepthFun, q = QArg}) when ?is_amqqueue(QArg) ->
+ QName = amqqueue:get_name(QArg),
+ MPid = amqqueue:get_pid(QArg),
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, QFound} when ?amqqueue_pid_equals(QFound, MPid) ->
+ ok = DepthFun(),
+ noreply(State);
+ _ ->
+ {stop, shutdown, State}
+ end;
+
+handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Mons }) ->
+ noreply(State #state { monitors = pmon:monitor_all(Pids, Mons) });
+
+handle_cast({delete_and_terminate, {shutdown, ring_shutdown}}, State) ->
+ {stop, normal, State};
+handle_cast({delete_and_terminate, Reason}, State) ->
+ {stop, Reason, State}.
+
+handle_info({'DOWN', _MonitorRef, process, Pid, _Reason},
+ State = #state { monitors = Mons,
+ death_fun = DeathFun }) ->
+ noreply(case pmon:is_monitored(Pid, Mons) of
+ false -> State;
+ true -> ok = DeathFun(Pid),
+ State #state { monitors = pmon:erase(Pid, Mons) }
+ end);
+
+handle_info(Msg, State) ->
+ {stop, {unexpected_info, Msg}, State}.
+
+terminate(_Reason, #state{}) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+handle_pre_hibernate(State = #state { gm = GM }) ->
+ %% Since GM notifications of deaths are lazy we might not get a
+ %% timely notification of mirror death if policy changes when
+ %% everything is idle. So cause some activity just before we
+ %% sleep. This won't cause us to go into perpetual motion as the
+ %% heartbeat does not wake up coordinator or mirrors.
+ gm:broadcast(GM, hibernate_heartbeat),
+ {hibernate, State}.
+
+%% ---------------------------------------------------------------------------
+%% GM
+%% ---------------------------------------------------------------------------
+
+joined([CPid], Members) ->
+ CPid ! {joined, self(), Members},
+ ok.
+
+members_changed([_CPid], _Births, []) ->
+ ok;
+members_changed([CPid], _Births, Deaths) ->
+ ok = gen_server2:cast(CPid, {gm_deaths, Deaths}).
+
+handle_msg([CPid], _From, request_depth = Msg) ->
+ ok = gen_server2:cast(CPid, Msg);
+handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) ->
+ ok = gen_server2:cast(CPid, Msg);
+handle_msg([_CPid], _From, {delete_and_terminate, _Reason}) ->
+ %% We tell GM to stop, but we don't instruct the coordinator to
+ %% stop yet. The GM will first make sure all pending messages were
+ %% actually delivered. Then it calls handle_terminate/2 below so the
+ %% coordinator is stopped.
+ %%
+ %% If we stop the coordinator right now, remote mirrors could see the
+ %% coordinator DOWN before delete_and_terminate was delivered to all
+ %% GMs. One of those GM would be promoted as the master, and this GM
+ %% would hang forever, waiting for other GMs to stop.
+ {stop, {shutdown, ring_shutdown}};
+handle_msg([_CPid], _From, _Msg) ->
+ ok.
+
+handle_terminate([CPid], Reason) ->
+ ok = gen_server2:cast(CPid, {delete_and_terminate, Reason}),
+ ok.
+
+%% ---------------------------------------------------------------------------
+%% Others
+%% ---------------------------------------------------------------------------
+
+noreply(State) ->
+ {noreply, State, hibernate}.
+
+reply(Reply, State) ->
+ {reply, Reply, State, hibernate}.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_master.erl b/deps/rabbit/src/rabbit_mirror_queue_master.erl
new file mode 100644
index 0000000000..71146e1ce2
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_master.erl
@@ -0,0 +1,578 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_master).
+
+-export([init/3, terminate/2, delete_and_terminate/2,
+ purge/1, purge_acks/1, publish/6, publish_delivered/5,
+ batch_publish/4, batch_publish_delivered/4,
+ discard/4, fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3,
+ len/1, is_empty/1, depth/1, drain_confirmed/1,
+ dropwhile/2, fetchwhile/4, set_ram_duration_target/2, ram_duration/1,
+ needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1,
+ msg_rates/1, info/2, invoke/3, is_duplicate/2, set_queue_mode/2,
+ zip_msgs_and_acks/4, handle_info/2]).
+
+-export([start/2, stop/1, delete_crashed/1]).
+
+-export([promote_backing_queue_state/8, sender_death_fun/0, depth_fun/0]).
+
+-export([init_with_existing_bq/3, stop_mirroring/1, sync_mirrors/3]).
+
+-behaviour(rabbit_backing_queue).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-record(state, { name,
+ gm,
+ coordinator,
+ backing_queue,
+ backing_queue_state,
+ seen_status,
+ confirmed,
+ known_senders,
+ wait_timeout
+ }).
+
+-export_type([death_fun/0, depth_fun/0, stats_fun/0]).
+
+-type death_fun() :: fun ((pid()) -> 'ok').
+-type depth_fun() :: fun (() -> 'ok').
+-type stats_fun() :: fun ((any()) -> 'ok').
+-type master_state() :: #state { name :: rabbit_amqqueue:name(),
+ gm :: pid(),
+ coordinator :: pid(),
+ backing_queue :: atom(),
+ backing_queue_state :: any(),
+ seen_status :: map(),
+ confirmed :: [rabbit_guid:guid()],
+ known_senders :: sets:set()
+ }.
+
+%% For general documentation of HA design, see
+%% rabbit_mirror_queue_coordinator
+
+%% ---------------------------------------------------------------------------
+%% Backing queue
+%% ---------------------------------------------------------------------------
+
+-spec start(_, _) -> no_return().
+start(_Vhost, _DurableQueues) ->
+ %% This will never get called as this module will never be
+ %% installed as the default BQ implementation.
+ exit({not_valid_for_generic_backing_queue, ?MODULE}).
+
+-spec stop(_) -> no_return().
+stop(_Vhost) ->
+ %% Same as start/1.
+ exit({not_valid_for_generic_backing_queue, ?MODULE}).
+
+-spec delete_crashed(_) -> no_return().
+delete_crashed(_QName) ->
+ exit({not_valid_for_generic_backing_queue, ?MODULE}).
+
+init(Q, Recover, AsyncCallback) ->
+ {ok, BQ} = application:get_env(backing_queue_module),
+ BQS = BQ:init(Q, Recover, AsyncCallback),
+ State = #state{gm = GM} = init_with_existing_bq(Q, BQ, BQS),
+ ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}),
+ State.
+
+-spec init_with_existing_bq(amqqueue:amqqueue(), atom(), any()) ->
+ master_state().
+
+init_with_existing_bq(Q0, BQ, BQS) when ?is_amqqueue(Q0) ->
+ QName = amqqueue:get_name(Q0),
+ case rabbit_mirror_queue_coordinator:start_link(
+ Q0, undefined, sender_death_fun(), depth_fun()) of
+ {ok, CPid} ->
+ GM = rabbit_mirror_queue_coordinator:get_gm(CPid),
+ Self = self(),
+ Fun = fun () ->
+ [Q1] = mnesia:read({rabbit_queue, QName}),
+ true = amqqueue:is_amqqueue(Q1),
+ GMPids0 = amqqueue:get_gm_pids(Q1),
+ GMPids1 = [{GM, Self} | GMPids0],
+ Q2 = amqqueue:set_gm_pids(Q1, GMPids1),
+ Q3 = amqqueue:set_state(Q2, live),
+ %% amqqueue migration:
+ %% The amqqueue was read from this transaction, no
+ %% need to handle migration.
+ ok = rabbit_amqqueue:store_queue(Q3)
+ end,
+ ok = rabbit_misc:execute_mnesia_transaction(Fun),
+ {_MNode, SNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q0),
+ %% We need synchronous add here (i.e. do not return until the
+ %% mirror is running) so that when queue declaration is finished
+ %% all mirrors are up; we don't want to end up with unsynced mirrors
+ %% just by declaring a new queue. But add can't be synchronous all
+ %% the time as it can be called by mirrors and that's
+ %% deadlock-prone.
+ rabbit_mirror_queue_misc:add_mirrors(QName, SNodes, sync),
+ #state{name = QName,
+ gm = GM,
+ coordinator = CPid,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ seen_status = #{},
+ confirmed = [],
+ known_senders = sets:new(),
+ wait_timeout = rabbit_misc:get_env(rabbit, slave_wait_timeout, 15000)};
+ {error, Reason} ->
+ %% The GM can shutdown before the coordinator has started up
+ %% (lost membership or missing group), thus the start_link of
+ %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process
+ % is trapping exists
+ throw({coordinator_not_started, Reason})
+ end.
+
+-spec stop_mirroring(master_state()) -> {atom(), any()}.
+
+stop_mirroring(State = #state { coordinator = CPid,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ unlink(CPid),
+ stop_all_slaves(shutdown, State),
+ {BQ, BQS}.
+
+-spec sync_mirrors(stats_fun(), stats_fun(), master_state()) ->
+ {'ok', master_state()} | {stop, any(), master_state()}.
+
+sync_mirrors(HandleInfo, EmitStats,
+ State = #state { name = QName,
+ gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ Log = fun (Fmt, Params) ->
+ rabbit_mirror_queue_misc:log_info(
+ QName, "Synchronising: " ++ Fmt ++ "~n", Params)
+ end,
+ Log("~p messages to synchronise", [BQ:len(BQS)]),
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ SPids = amqqueue:get_slave_pids(Q),
+ SyncBatchSize = rabbit_mirror_queue_misc:sync_batch_size(Q),
+ Log("batch size: ~p", [SyncBatchSize]),
+ Ref = make_ref(),
+ Syncer = rabbit_mirror_queue_sync:master_prepare(Ref, QName, Log, SPids),
+ gm:broadcast(GM, {sync_start, Ref, Syncer, SPids}),
+ S = fun(BQSN) -> State#state{backing_queue_state = BQSN} end,
+ case rabbit_mirror_queue_sync:master_go(
+ Syncer, Ref, Log, HandleInfo, EmitStats, SyncBatchSize, BQ, BQS) of
+ {cancelled, BQS1} -> Log(" synchronisation cancelled ", []),
+ {ok, S(BQS1)};
+ {shutdown, R, BQS1} -> {stop, R, S(BQS1)};
+ {sync_died, R, BQS1} -> Log("~p", [R]),
+ {ok, S(BQS1)};
+ {already_synced, BQS1} -> {ok, S(BQS1)};
+ {ok, BQS1} -> Log("complete", []),
+ {ok, S(BQS1)}
+ end.
+
+terminate({shutdown, dropped} = Reason,
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ %% Backing queue termination - this node has been explicitly
+ %% dropped. Normally, non-durable queues would be tidied up on
+ %% startup, but there's a possibility that we will be added back
+ %% in without this node being restarted. Thus we must do the full
+ %% blown delete_and_terminate now, but only locally: we do not
+ %% broadcast delete_and_terminate.
+ State#state{backing_queue_state = BQ:delete_and_terminate(Reason, BQS)};
+
+terminate(Reason,
+ State = #state { name = QName,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ %% Backing queue termination. The queue is going down but
+ %% shouldn't be deleted. Most likely safe shutdown of this
+ %% node.
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ SSPids = amqqueue:get_sync_slave_pids(Q),
+ case SSPids =:= [] andalso
+ rabbit_policy:get(<<"ha-promote-on-shutdown">>, Q) =/= <<"always">> of
+ true -> %% Remove the whole queue to avoid data loss
+ rabbit_mirror_queue_misc:log_warning(
+ QName, "Stopping all nodes on master shutdown since no "
+ "synchronised mirror (replica) is available~n", []),
+ stop_all_slaves(Reason, State);
+ false -> %% Just let some other mirror take over.
+ ok
+ end,
+ State #state { backing_queue_state = BQ:terminate(Reason, BQS) }.
+
+delete_and_terminate(Reason, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ stop_all_slaves(Reason, State),
+ State#state{backing_queue_state = BQ:delete_and_terminate(Reason, BQS)}.
+
+stop_all_slaves(Reason, #state{name = QName, gm = GM, wait_timeout = WT}) ->
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ SPids = amqqueue:get_slave_pids(Q),
+ rabbit_mirror_queue_misc:stop_all_slaves(Reason, SPids, QName, GM, WT).
+
+purge(State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ ok = gm:broadcast(GM, {drop, 0, BQ:len(BQS), false}),
+ {Count, BQS1} = BQ:purge(BQS),
+ {Count, State #state { backing_queue_state = BQS1 }}.
+
+-spec purge_acks(_) -> no_return().
+purge_acks(_State) -> exit({not_implemented, {?MODULE, purge_acks}}).
+
+publish(Msg = #basic_message { id = MsgId }, MsgProps, IsDelivered, ChPid, Flow,
+ State = #state { gm = GM,
+ seen_status = SS,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ false = maps:is_key(MsgId, SS), %% ASSERTION
+ ok = gm:broadcast(GM, {publish, ChPid, Flow, MsgProps, Msg},
+ rabbit_basic:msg_size(Msg)),
+ BQS1 = BQ:publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQS),
+ ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }).
+
+batch_publish(Publishes, ChPid, Flow,
+ State = #state { gm = GM,
+ seen_status = SS,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {Publishes1, false, MsgSizes} =
+ lists:foldl(fun ({Msg = #basic_message { id = MsgId },
+ MsgProps, _IsDelivered}, {Pubs, false, Sizes}) ->
+ {[{Msg, MsgProps, true} | Pubs], %% [0]
+ false = maps:is_key(MsgId, SS), %% ASSERTION
+ Sizes + rabbit_basic:msg_size(Msg)}
+ end, {[], false, 0}, Publishes),
+ Publishes2 = lists:reverse(Publishes1),
+ ok = gm:broadcast(GM, {batch_publish, ChPid, Flow, Publishes2},
+ MsgSizes),
+ BQS1 = BQ:batch_publish(Publishes2, ChPid, Flow, BQS),
+ ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }).
+%% [0] When the mirror process handles the publish command, it sets the
+%% IsDelivered flag to true, so to avoid iterating over the messages
+%% again at the mirror, we do it here.
+
+publish_delivered(Msg = #basic_message { id = MsgId }, MsgProps,
+ ChPid, Flow, State = #state { gm = GM,
+ seen_status = SS,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ false = maps:is_key(MsgId, SS), %% ASSERTION
+ ok = gm:broadcast(GM, {publish_delivered, ChPid, Flow, MsgProps, Msg},
+ rabbit_basic:msg_size(Msg)),
+ {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, Flow, BQS),
+ State1 = State #state { backing_queue_state = BQS1 },
+ {AckTag, ensure_monitoring(ChPid, State1)}.
+
+batch_publish_delivered(Publishes, ChPid, Flow,
+ State = #state { gm = GM,
+ seen_status = SS,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {false, MsgSizes} =
+ lists:foldl(fun ({Msg = #basic_message { id = MsgId }, _MsgProps},
+ {false, Sizes}) ->
+ {false = maps:is_key(MsgId, SS), %% ASSERTION
+ Sizes + rabbit_basic:msg_size(Msg)}
+ end, {false, 0}, Publishes),
+ ok = gm:broadcast(GM, {batch_publish_delivered, ChPid, Flow, Publishes},
+ MsgSizes),
+ {AckTags, BQS1} = BQ:batch_publish_delivered(Publishes, ChPid, Flow, BQS),
+ State1 = State #state { backing_queue_state = BQS1 },
+ {AckTags, ensure_monitoring(ChPid, State1)}.
+
+discard(MsgId, ChPid, Flow, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ seen_status = SS }) ->
+ false = maps:is_key(MsgId, SS), %% ASSERTION
+ ok = gm:broadcast(GM, {discard, ChPid, Flow, MsgId}),
+ ensure_monitoring(ChPid,
+ State #state { backing_queue_state =
+ BQ:discard(MsgId, ChPid, Flow, BQS) }).
+
+dropwhile(Pred, State = #state{backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ Len = BQ:len(BQS),
+ {Next, BQS1} = BQ:dropwhile(Pred, BQS),
+ {Next, drop(Len, false, State #state { backing_queue_state = BQS1 })}.
+
+fetchwhile(Pred, Fun, Acc, State = #state{backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ Len = BQ:len(BQS),
+ {Next, Acc1, BQS1} = BQ:fetchwhile(Pred, Fun, Acc, BQS),
+ {Next, Acc1, drop(Len, true, State #state { backing_queue_state = BQS1 })}.
+
+drain_confirmed(State = #state { backing_queue = BQ,
+ backing_queue_state = BQS,
+ seen_status = SS,
+ confirmed = Confirmed }) ->
+ {MsgIds, BQS1} = BQ:drain_confirmed(BQS),
+ {MsgIds1, SS1} =
+ lists:foldl(
+ fun (MsgId, {MsgIdsN, SSN}) ->
+ %% We will never see 'discarded' here
+ case maps:find(MsgId, SSN) of
+ error ->
+ {[MsgId | MsgIdsN], SSN};
+ {ok, published} ->
+ %% It was published when we were a mirror,
+ %% and we were promoted before we saw the
+ %% publish from the channel. We still
+ %% haven't seen the channel publish, and
+ %% consequently we need to filter out the
+ %% confirm here. We will issue the confirm
+ %% when we see the publish from the channel.
+ {MsgIdsN, maps:put(MsgId, confirmed, SSN)};
+ {ok, confirmed} ->
+ %% Well, confirms are racy by definition.
+ {[MsgId | MsgIdsN], SSN}
+ end
+ end, {[], SS}, MsgIds),
+ {Confirmed ++ MsgIds1, State #state { backing_queue_state = BQS1,
+ seen_status = SS1,
+ confirmed = [] }}.
+
+fetch(AckRequired, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {Result, BQS1} = BQ:fetch(AckRequired, BQS),
+ State1 = State #state { backing_queue_state = BQS1 },
+ {Result, case Result of
+ empty -> State1;
+ {_MsgId, _IsDelivered, _AckTag} -> drop_one(AckRequired, State1)
+ end}.
+
+drop(AckRequired, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {Result, BQS1} = BQ:drop(AckRequired, BQS),
+ State1 = State #state { backing_queue_state = BQS1 },
+ {Result, case Result of
+ empty -> State1;
+ {_MsgId, _AckTag} -> drop_one(AckRequired, State1)
+ end}.
+
+ack(AckTags, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {MsgIds, BQS1} = BQ:ack(AckTags, BQS),
+ case MsgIds of
+ [] -> ok;
+ _ -> ok = gm:broadcast(GM, {ack, MsgIds})
+ end,
+ {MsgIds, State #state { backing_queue_state = BQS1 }}.
+
+requeue(AckTags, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {MsgIds, BQS1} = BQ:requeue(AckTags, BQS),
+ ok = gm:broadcast(GM, {requeue, MsgIds}),
+ {MsgIds, State #state { backing_queue_state = BQS1 }}.
+
+ackfold(MsgFun, Acc, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }, AckTags) ->
+ {Acc1, BQS1} = BQ:ackfold(MsgFun, Acc, BQS, AckTags),
+ {Acc1, State #state { backing_queue_state = BQS1 }}.
+
+fold(Fun, Acc, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {Result, BQS1} = BQ:fold(Fun, Acc, BQS),
+ {Result, State #state { backing_queue_state = BQS1 }}.
+
+len(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:len(BQS).
+
+is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:is_empty(BQS).
+
+depth(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:depth(BQS).
+
+set_ram_duration_target(Target, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state =
+ BQ:set_ram_duration_target(Target, BQS) }.
+
+ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ {Result, BQS1} = BQ:ram_duration(BQS),
+ {Result, State #state { backing_queue_state = BQS1 }}.
+
+needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:needs_timeout(BQS).
+
+timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:timeout(BQS) }.
+
+handle_pre_hibernate(State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:handle_pre_hibernate(BQS) }.
+
+handle_info(Msg, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:handle_info(Msg, BQS) }.
+
+resume(State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:resume(BQS) }.
+
+msg_rates(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:msg_rates(BQS).
+
+info(backing_queue_status,
+ State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:info(backing_queue_status, BQS) ++
+ [ {mirror_seen, maps:size(State #state.seen_status)},
+ {mirror_senders, sets:size(State #state.known_senders)} ];
+info(Item, #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:info(Item, BQS).
+
+invoke(?MODULE, Fun, State) ->
+ Fun(?MODULE, State);
+invoke(Mod, Fun, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }.
+
+is_duplicate(Message = #basic_message { id = MsgId },
+ State = #state { seen_status = SS,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ confirmed = Confirmed }) ->
+ %% Here, we need to deal with the possibility that we're about to
+ %% receive a message that we've already seen when we were a mirror
+ %% (we received it via gm). Thus if we do receive such message now
+ %% via the channel, there may be a confirm waiting to issue for
+ %% it.
+
+ %% We will never see {published, ChPid, MsgSeqNo} here.
+ case maps:find(MsgId, SS) of
+ error ->
+ %% We permit the underlying BQ to have a peek at it, but
+ %% only if we ourselves are not filtering out the msg.
+ {Result, BQS1} = BQ:is_duplicate(Message, BQS),
+ {Result, State #state { backing_queue_state = BQS1 }};
+ {ok, published} ->
+ %% It already got published when we were a mirror and no
+ %% confirmation is waiting. amqqueue_process will have, in
+ %% its msg_id_to_channel mapping, the entry for dealing
+ %% with the confirm when that comes back in (it's added
+ %% immediately after calling is_duplicate). The msg is
+ %% invalid. We will not see this again, nor will we be
+ %% further involved in confirming this message, so erase.
+ {{true, drop}, State #state { seen_status = maps:remove(MsgId, SS) }};
+ {ok, Disposition}
+ when Disposition =:= confirmed
+ %% It got published when we were a mirror via gm, and
+ %% confirmed some time after that (maybe even after
+ %% promotion), but before we received the publish from the
+ %% channel, so couldn't previously know what the
+ %% msg_seq_no was (and thus confirm as a mirror). So we
+ %% need to confirm now. As above, amqqueue_process will
+ %% have the entry for the msg_id_to_channel mapping added
+ %% immediately after calling is_duplicate/2.
+ orelse Disposition =:= discarded ->
+ %% Message was discarded while we were a mirror. Confirm now.
+ %% As above, amqqueue_process will have the entry for the
+ %% msg_id_to_channel mapping.
+ {{true, drop}, State #state { seen_status = maps:remove(MsgId, SS),
+ confirmed = [MsgId | Confirmed] }}
+ end.
+
+set_queue_mode(Mode, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ ok = gm:broadcast(GM, {set_queue_mode, Mode}),
+ BQS1 = BQ:set_queue_mode(Mode, BQS),
+ State #state { backing_queue_state = BQS1 }.
+
+zip_msgs_and_acks(Msgs, AckTags, Accumulator,
+ #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ BQ:zip_msgs_and_acks(Msgs, AckTags, Accumulator, BQS).
+
+%% ---------------------------------------------------------------------------
+%% Other exported functions
+%% ---------------------------------------------------------------------------
+
+-spec promote_backing_queue_state
+ (rabbit_amqqueue:name(), pid(), atom(), any(), pid(), [any()],
+ map(), [pid()]) ->
+ master_state().
+
+promote_backing_queue_state(QName, CPid, BQ, BQS, GM, AckTags, Seen, KS) ->
+ {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS),
+ Len = BQ:len(BQS1),
+ Depth = BQ:depth(BQS1),
+ true = Len == Depth, %% ASSERTION: everything must have been requeued
+ ok = gm:broadcast(GM, {depth, Depth}),
+ WaitTimeout = rabbit_misc:get_env(rabbit, slave_wait_timeout, 15000),
+ #state { name = QName,
+ gm = GM,
+ coordinator = CPid,
+ backing_queue = BQ,
+ backing_queue_state = BQS1,
+ seen_status = Seen,
+ confirmed = [],
+ known_senders = sets:from_list(KS),
+ wait_timeout = WaitTimeout }.
+
+-spec sender_death_fun() -> death_fun().
+
+sender_death_fun() ->
+ Self = self(),
+ fun (DeadPid) ->
+ rabbit_amqqueue:run_backing_queue(
+ Self, ?MODULE,
+ fun (?MODULE, State = #state { gm = GM, known_senders = KS }) ->
+ ok = gm:broadcast(GM, {sender_death, DeadPid}),
+ KS1 = sets:del_element(DeadPid, KS),
+ State #state { known_senders = KS1 }
+ end)
+ end.
+
+-spec depth_fun() -> depth_fun().
+
+depth_fun() ->
+ Self = self(),
+ fun () ->
+ rabbit_amqqueue:run_backing_queue(
+ Self, ?MODULE,
+ fun (?MODULE, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}),
+ State
+ end)
+ end.
+
+%% ---------------------------------------------------------------------------
+%% Helpers
+%% ---------------------------------------------------------------------------
+
+drop_one(AckRequired, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ ok = gm:broadcast(GM, {drop, BQ:len(BQS), 1, AckRequired}),
+ State.
+
+drop(PrevLen, AckRequired, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ Len = BQ:len(BQS),
+ case PrevLen - Len of
+ 0 -> State;
+ Dropped -> ok = gm:broadcast(GM, {drop, Len, Dropped, AckRequired}),
+ State
+ end.
+
+ensure_monitoring(ChPid, State = #state { coordinator = CPid,
+ known_senders = KS }) ->
+ case sets:is_element(ChPid, KS) of
+ true -> State;
+ false -> ok = rabbit_mirror_queue_coordinator:ensure_monitoring(
+ CPid, [ChPid]),
+ State #state { known_senders = sets:add_element(ChPid, KS) }
+ end.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_misc.erl b/deps/rabbit/src/rabbit_mirror_queue_misc.erl
new file mode 100644
index 0000000000..02f590e2fb
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_misc.erl
@@ -0,0 +1,680 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_misc).
+-behaviour(rabbit_policy_validator).
+
+-export([remove_from_queue/3, on_vhost_up/1, add_mirrors/3,
+ report_deaths/4, store_updated_slaves/1,
+ initial_queue_node/2, suggested_queue_nodes/1, actual_queue_nodes/1,
+ is_mirrored/1, is_mirrored_ha_nodes/1,
+ update_mirrors/2, update_mirrors/1, validate_policy/1,
+ maybe_auto_sync/1, maybe_drop_master_after_sync/1,
+ sync_batch_size/1, log_info/3, log_warning/3]).
+-export([stop_all_slaves/5]).
+
+-export([sync_queue/1, cancel_sync_queue/1]).
+
+-export([transfer_leadership/2, queue_length/1, get_replicas/1]).
+
+%% for testing only
+-export([module/1]).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-define(HA_NODES_MODULE, rabbit_mirror_queue_mode_nodes).
+
+-rabbit_boot_step(
+ {?MODULE,
+ [{description, "HA policy validation"},
+ {mfa, {rabbit_registry, register,
+ [policy_validator, <<"ha-mode">>, ?MODULE]}},
+ {mfa, {rabbit_registry, register,
+ [policy_validator, <<"ha-params">>, ?MODULE]}},
+ {mfa, {rabbit_registry, register,
+ [policy_validator, <<"ha-sync-mode">>, ?MODULE]}},
+ {mfa, {rabbit_registry, register,
+ [policy_validator, <<"ha-sync-batch-size">>, ?MODULE]}},
+ {mfa, {rabbit_registry, register,
+ [policy_validator, <<"ha-promote-on-shutdown">>, ?MODULE]}},
+ {mfa, {rabbit_registry, register,
+ [policy_validator, <<"ha-promote-on-failure">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, recovery}]}).
+
+
+%%----------------------------------------------------------------------------
+
+%% Returns {ok, NewMPid, DeadPids, ExtraNodes}
+
+-spec remove_from_queue
+ (rabbit_amqqueue:name(), pid(), [pid()]) ->
+ {'ok', pid(), [pid()], [node()]} | {'error', 'not_found'} |
+ {'error', {'not_synced', [pid()]}}.
+
+remove_from_queue(QueueName, Self, DeadGMPids) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ %% Someone else could have deleted the queue before we
+ %% get here. Or, gm group could've altered. see rabbitmq-server#914
+ case mnesia:read({rabbit_queue, QueueName}) of
+ [] -> {error, not_found};
+ [Q0] when ?is_amqqueue(Q0) ->
+ QPid = amqqueue:get_pid(Q0),
+ SPids = amqqueue:get_slave_pids(Q0),
+ SyncSPids = amqqueue:get_sync_slave_pids(Q0),
+ GMPids = amqqueue:get_gm_pids(Q0),
+ {DeadGM, AliveGM} = lists:partition(
+ fun ({GM, _}) ->
+ lists:member(GM, DeadGMPids)
+ end, GMPids),
+ DeadPids = [Pid || {_GM, Pid} <- DeadGM],
+ AlivePids = [Pid || {_GM, Pid} <- AliveGM],
+ Alive = [Pid || Pid <- [QPid | SPids],
+ lists:member(Pid, AlivePids)],
+ {QPid1, SPids1} = case Alive of
+ [] ->
+ %% GM altered, & if all pids are
+ %% perceived as dead, rather do
+ %% do nothing here, & trust the
+ %% promoted mirror to have updated
+ %% mnesia during the alteration.
+ {QPid, SPids};
+ _ -> promote_slave(Alive)
+ end,
+ DoNotPromote = SyncSPids =:= [] andalso
+ rabbit_policy:get(<<"ha-promote-on-failure">>, Q0) =:= <<"when-synced">>,
+ case {{QPid, SPids}, {QPid1, SPids1}} of
+ {Same, Same} ->
+ {ok, QPid1, DeadPids, []};
+ _ when QPid1 =/= QPid andalso QPid1 =:= Self andalso DoNotPromote =:= true ->
+ %% We have been promoted to master
+ %% but there are no synchronised mirrors
+ %% hence this node is not synchronised either
+ %% Bailing out.
+ {error, {not_synced, SPids1}};
+ _ when QPid =:= QPid1 orelse QPid1 =:= Self ->
+ %% Either master hasn't changed, so
+ %% we're ok to update mnesia; or we have
+ %% become the master. If gm altered,
+ %% we have no choice but to proceed.
+ Q1 = amqqueue:set_pid(Q0, QPid1),
+ Q2 = amqqueue:set_slave_pids(Q1, SPids1),
+ Q3 = amqqueue:set_gm_pids(Q2, AliveGM),
+ store_updated_slaves(Q3),
+ %% If we add and remove nodes at the
+ %% same time we might tell the old
+ %% master we need to sync and then
+ %% shut it down. So let's check if
+ %% the new master needs to sync.
+ maybe_auto_sync(Q3),
+ {ok, QPid1, DeadPids, slaves_to_start_on_failure(Q3, DeadGMPids)};
+ _ ->
+ %% Master has changed, and we're not it.
+ %% [1].
+ Q1 = amqqueue:set_slave_pids(Q0, Alive),
+ Q2 = amqqueue:set_gm_pids(Q1, AliveGM),
+ store_updated_slaves(Q2),
+ {ok, QPid1, DeadPids, []}
+ end
+ end
+ end).
+%% [1] We still update mnesia here in case the mirror that is supposed
+%% to become master dies before it does do so, in which case the dead
+%% old master might otherwise never get removed, which in turn might
+%% prevent promotion of another mirror (e.g. us).
+%%
+%% Note however that we do not update the master pid. Otherwise we can
+%% have the situation where a mirror updates the mnesia record for a
+%% queue, promoting another mirror before that mirror realises it has
+%% become the new master, which is bad because it could then mean the
+%% mirror (now master) receives messages it's not ready for (for
+%% example, new consumers).
+%%
+%% We set slave_pids to Alive rather than SPids1 since otherwise we'd
+%% be removing the pid of the candidate master, which in turn would
+%% prevent it from promoting itself.
+%%
+%% We maintain gm_pids as our source of truth, i.e. it contains the
+%% most up-to-date information about which GMs and associated
+%% {M,S}Pids are alive. And all pids in slave_pids always have a
+%% corresponding entry in gm_pids. By contrast, due to the
+%% aforementioned restriction on updating the master pid, that pid may
+%% not be present in gm_pids, but only if said master has died.
+
+%% Sometimes a mirror dying means we need to start more on other
+%% nodes - "exactly" mode can cause this to happen.
+slaves_to_start_on_failure(Q, DeadGMPids) ->
+ %% In case Mnesia has not caught up yet, filter out nodes we know
+ %% to be dead..
+ ClusterNodes = rabbit_nodes:all_running() --
+ [node(P) || P <- DeadGMPids],
+ {_, OldNodes, _} = actual_queue_nodes(Q),
+ {_, NewNodes} = suggested_queue_nodes(Q, ClusterNodes),
+ NewNodes -- OldNodes.
+
+on_vhost_up(VHost) ->
+ QNames =
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ mnesia:foldl(
+ fun
+ (Q, QNames0) when not ?amqqueue_vhost_equals(Q, VHost) ->
+ QNames0;
+ (Q, QNames0) when ?amqqueue_is_classic(Q) ->
+ QName = amqqueue:get_name(Q),
+ Pid = amqqueue:get_pid(Q),
+ SPids = amqqueue:get_slave_pids(Q),
+ %% We don't want to pass in the whole
+ %% cluster - we don't want a situation
+ %% where starting one node causes us to
+ %% decide to start a mirror on another
+ PossibleNodes0 = [node(P) || P <- [Pid | SPids]],
+ PossibleNodes =
+ case lists:member(node(), PossibleNodes0) of
+ true -> PossibleNodes0;
+ false -> [node() | PossibleNodes0]
+ end,
+ {_MNode, SNodes} = suggested_queue_nodes(
+ Q, PossibleNodes),
+ case lists:member(node(), SNodes) of
+ true -> [QName | QNames0];
+ false -> QNames0
+ end;
+ (_, QNames0) ->
+ QNames0
+ end, [], rabbit_queue)
+ end),
+ [add_mirror(QName, node(), async) || QName <- QNames],
+ ok.
+
+drop_mirrors(QName, Nodes) ->
+ [drop_mirror(QName, Node) || Node <- Nodes],
+ ok.
+
+drop_mirror(QName, MirrorNode) ->
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} when ?is_amqqueue(Q) ->
+ Name = amqqueue:get_name(Q),
+ QPid = amqqueue:get_pid(Q),
+ SPids = amqqueue:get_slave_pids(Q),
+ case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of
+ [] ->
+ {error, {queue_not_mirrored_on_node, MirrorNode}};
+ [QPid] when SPids =:= [] ->
+ {error, cannot_drop_only_mirror};
+ [Pid] ->
+ log_info(Name, "Dropping queue mirror on node ~p~n",
+ [MirrorNode]),
+ exit(Pid, {shutdown, dropped}),
+ {ok, dropped}
+ end;
+ {error, not_found} = E ->
+ E
+ end.
+
+-spec add_mirrors(rabbit_amqqueue:name(), [node()], 'sync' | 'async') ->
+ 'ok'.
+
+add_mirrors(QName, Nodes, SyncMode) ->
+ [add_mirror(QName, Node, SyncMode) || Node <- Nodes],
+ ok.
+
+add_mirror(QName, MirrorNode, SyncMode) ->
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} ->
+ rabbit_misc:with_exit_handler(
+ rabbit_misc:const(ok),
+ fun () ->
+ #resource{virtual_host = VHost} = amqqueue:get_name(Q),
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost, MirrorNode) of
+ {ok, _} ->
+ try
+ SPid = rabbit_amqqueue_sup_sup:start_queue_process(
+ MirrorNode, Q, slave),
+ log_info(QName, "Adding mirror on node ~p: ~p~n",
+ [MirrorNode, SPid]),
+ rabbit_mirror_queue_slave:go(SPid, SyncMode)
+ of
+ _ -> ok
+ catch
+ error:QError ->
+ log_warning(QName,
+ "Unable to start queue mirror on node '~p'. "
+ "Target queue supervisor is not running: ~p~n",
+ [MirrorNode, QError])
+ end;
+ {error, Error} ->
+ log_warning(QName,
+ "Unable to start queue mirror on node '~p'. "
+ "Target virtual host is not running: ~p~n",
+ [MirrorNode, Error]),
+ ok
+ end
+ end);
+ {error, not_found} = E ->
+ E
+ end.
+
+report_deaths(_MirrorPid, _IsMaster, _QueueName, []) ->
+ ok;
+report_deaths(MirrorPid, IsMaster, QueueName, DeadPids) ->
+ log_info(QueueName, "~s ~s saw deaths of mirrors~s~n",
+ [case IsMaster of
+ true -> "Master";
+ false -> "Slave"
+ end,
+ rabbit_misc:pid_to_string(MirrorPid),
+ [[$ , rabbit_misc:pid_to_string(P)] || P <- DeadPids]]).
+
+-spec log_info(rabbit_amqqueue:name(), string(), [any()]) -> 'ok'.
+
+log_info (QName, Fmt, Args) ->
+ rabbit_log_mirroring:info("Mirrored ~s: " ++ Fmt,
+ [rabbit_misc:rs(QName) | Args]).
+
+-spec log_warning(rabbit_amqqueue:name(), string(), [any()]) -> 'ok'.
+
+log_warning(QName, Fmt, Args) ->
+ rabbit_log_mirroring:warning("Mirrored ~s: " ++ Fmt,
+ [rabbit_misc:rs(QName) | Args]).
+
+-spec store_updated_slaves(amqqueue:amqqueue()) ->
+ amqqueue:amqqueue().
+
+store_updated_slaves(Q0) when ?is_amqqueue(Q0) ->
+ SPids = amqqueue:get_slave_pids(Q0),
+ SSPids = amqqueue:get_sync_slave_pids(Q0),
+ RS0 = amqqueue:get_recoverable_slaves(Q0),
+ %% TODO now that we clear sync_slave_pids in rabbit_durable_queue,
+ %% do we still need this filtering?
+ SSPids1 = [SSPid || SSPid <- SSPids, lists:member(SSPid, SPids)],
+ Q1 = amqqueue:set_sync_slave_pids(Q0, SSPids1),
+ RS1 = update_recoverable(SPids, RS0),
+ Q2 = amqqueue:set_recoverable_slaves(Q1, RS1),
+ Q3 = amqqueue:set_state(Q2, live),
+ %% amqqueue migration:
+ %% The amqqueue was read from this transaction, no need to handle
+ %% migration.
+ ok = rabbit_amqqueue:store_queue(Q3),
+ %% Wake it up so that we emit a stats event
+ rabbit_amqqueue:notify_policy_changed(Q3),
+ Q3.
+
+%% Recoverable nodes are those which we could promote if the whole
+%% cluster were to suddenly stop and we then lose the master; i.e. all
+%% nodes with running mirrors , and all stopped nodes which had running
+%% mirrors when they were up.
+%%
+%% Therefore we aim here to add new nodes with mirrors , and remove
+%% running nodes without mirrors , We also try to keep the order
+%% constant, and similar to the live SPids field (i.e. oldest
+%% first). That's not necessarily optimal if nodes spend a long time
+%% down, but we don't have a good way to predict what the optimal is
+%% in that case anyway, and we assume nodes will not just be down for
+%% a long time without being removed.
+update_recoverable(SPids, RS) ->
+ SNodes = [node(SPid) || SPid <- SPids],
+ RunningNodes = rabbit_nodes:all_running(),
+ AddNodes = SNodes -- RS,
+ DelNodes = RunningNodes -- SNodes, %% i.e. running with no slave
+ (RS -- DelNodes) ++ AddNodes.
+
+stop_all_slaves(Reason, SPids, QName, GM, WaitTimeout) ->
+ PidsMRefs = [{Pid, erlang:monitor(process, Pid)} || Pid <- [GM | SPids]],
+ ok = gm:broadcast(GM, {delete_and_terminate, Reason}),
+ %% It's possible that we could be partitioned from some mirrors
+ %% between the lookup and the broadcast, in which case we could
+ %% monitor them but they would not have received the GM
+ %% message. So only wait for mirrors which are still
+ %% not-partitioned.
+ PendingSlavePids = lists:foldl(fun({Pid, MRef}, Acc) ->
+ case rabbit_mnesia:on_running_node(Pid) of
+ true ->
+ receive
+ {'DOWN', MRef, process, _Pid, _Info} ->
+ Acc
+ after WaitTimeout ->
+ rabbit_mirror_queue_misc:log_warning(
+ QName, "Missing 'DOWN' message from ~p in"
+ " node ~p~n", [Pid, node(Pid)]),
+ [Pid | Acc]
+ end;
+ false ->
+ Acc
+ end
+ end, [], PidsMRefs),
+ %% Normally when we remove a mirror another mirror or master will
+ %% notice and update Mnesia. But we just removed them all, and
+ %% have stopped listening ourselves. So manually clean up.
+ rabbit_misc:execute_mnesia_transaction(fun () ->
+ [Q0] = mnesia:read({rabbit_queue, QName}),
+ Q1 = amqqueue:set_gm_pids(Q0, []),
+ Q2 = amqqueue:set_slave_pids(Q1, []),
+ %% Restarted mirrors on running nodes can
+ %% ensure old incarnations are stopped using
+ %% the pending mirror pids.
+ Q3 = amqqueue:set_slave_pids_pending_shutdown(Q2, PendingSlavePids),
+ rabbit_mirror_queue_misc:store_updated_slaves(Q3)
+ end),
+ ok = gm:forget_group(QName).
+
+%%----------------------------------------------------------------------------
+
+promote_slave([SPid | SPids]) ->
+ %% The mirror pids are maintained in descending order of age, so
+ %% the one to promote is the oldest.
+ {SPid, SPids}.
+
+-spec initial_queue_node(amqqueue:amqqueue(), node()) -> node().
+
+initial_queue_node(Q, DefNode) ->
+ {MNode, _SNodes} = suggested_queue_nodes(Q, DefNode, rabbit_nodes:all_running()),
+ MNode.
+
+-spec suggested_queue_nodes(amqqueue:amqqueue()) ->
+ {node(), [node()]}.
+
+suggested_queue_nodes(Q) -> suggested_queue_nodes(Q, rabbit_nodes:all_running()).
+suggested_queue_nodes(Q, All) -> suggested_queue_nodes(Q, node(), All).
+
+%% The third argument exists so we can pull a call to
+%% rabbit_nodes:all_running() out of a loop or transaction
+%% or both.
+suggested_queue_nodes(Q, DefNode, All) when ?is_amqqueue(Q) ->
+ Owner = amqqueue:get_exclusive_owner(Q),
+ {MNode0, SNodes, SSNodes} = actual_queue_nodes(Q),
+ MNode = case MNode0 of
+ none -> DefNode;
+ _ -> MNode0
+ end,
+ case Owner of
+ none -> Params = policy(<<"ha-params">>, Q),
+ case module(Q) of
+ {ok, M} -> M:suggested_queue_nodes(
+ Params, MNode, SNodes, SSNodes, All);
+ _ -> {MNode, []}
+ end;
+ _ -> {MNode, []}
+ end.
+
+policy(Policy, Q) ->
+ case rabbit_policy:get(Policy, Q) of
+ undefined -> none;
+ P -> P
+ end.
+
+module(Q) when ?is_amqqueue(Q) ->
+ case rabbit_policy:get(<<"ha-mode">>, Q) of
+ undefined -> not_mirrored;
+ Mode -> module(Mode)
+ end;
+
+module(Mode) when is_binary(Mode) ->
+ case rabbit_registry:binary_to_type(Mode) of
+ {error, not_found} -> not_mirrored;
+ T -> case rabbit_registry:lookup_module(ha_mode, T) of
+ {ok, Module} -> {ok, Module};
+ _ -> not_mirrored
+ end
+ end.
+
+validate_mode(Mode) ->
+ case module(Mode) of
+ {ok, _Module} ->
+ ok;
+ not_mirrored ->
+ {error, "~p is not a valid ha-mode value", [Mode]}
+ end.
+
+-spec is_mirrored(amqqueue:amqqueue()) -> boolean().
+
+is_mirrored(Q) ->
+ case module(Q) of
+ {ok, _} -> true;
+ _ -> false
+ end.
+
+is_mirrored_ha_nodes(Q) ->
+ case module(Q) of
+ {ok, ?HA_NODES_MODULE} -> true;
+ _ -> false
+ end.
+
+actual_queue_nodes(Q) when ?is_amqqueue(Q) ->
+ MPid = amqqueue:get_pid(Q),
+ SPids = amqqueue:get_slave_pids(Q),
+ SSPids = amqqueue:get_sync_slave_pids(Q),
+ Nodes = fun (L) -> [node(Pid) || Pid <- L] end,
+ {case MPid of
+ none -> none;
+ _ -> node(MPid)
+ end, Nodes(SPids), Nodes(SSPids)}.
+
+-spec maybe_auto_sync(amqqueue:amqqueue()) -> 'ok'.
+
+maybe_auto_sync(Q) when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ case policy(<<"ha-sync-mode">>, Q) of
+ <<"automatic">> ->
+ spawn(fun() -> rabbit_amqqueue:sync_mirrors(QPid) end);
+ _ ->
+ ok
+ end.
+
+sync_queue(Q0) ->
+ F = fun
+ (Q) when ?amqqueue_is_classic(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ rabbit_amqqueue:sync_mirrors(QPid);
+ (Q) when ?amqqueue_is_quorum(Q) ->
+ {error, quorum_queue_not_supported}
+ end,
+ rabbit_amqqueue:with(Q0, F).
+
+cancel_sync_queue(Q0) ->
+ F = fun
+ (Q) when ?amqqueue_is_classic(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ rabbit_amqqueue:cancel_sync_mirrors(QPid);
+ (Q) when ?amqqueue_is_quorum(Q) ->
+ {error, quorum_queue_not_supported}
+ end,
+ rabbit_amqqueue:with(Q0, F).
+
+sync_batch_size(Q) when ?is_amqqueue(Q) ->
+ case policy(<<"ha-sync-batch-size">>, Q) of
+ none -> %% we need this case because none > 1 == true
+ default_batch_size();
+ BatchSize when BatchSize > 1 ->
+ BatchSize;
+ _ ->
+ default_batch_size()
+ end.
+
+-define(DEFAULT_BATCH_SIZE, 4096).
+
+default_batch_size() ->
+ rabbit_misc:get_env(rabbit, mirroring_sync_batch_size,
+ ?DEFAULT_BATCH_SIZE).
+
+-spec update_mirrors
+ (amqqueue:amqqueue(), amqqueue:amqqueue()) -> 'ok'.
+
+update_mirrors(OldQ, NewQ) when ?amqqueue_pids_are_equal(OldQ, NewQ) ->
+ % Note: we do want to ensure both queues have same pid
+ QPid = amqqueue:get_pid(OldQ),
+ QPid = amqqueue:get_pid(NewQ),
+ case {is_mirrored(OldQ), is_mirrored(NewQ)} of
+ {false, false} -> ok;
+ _ -> rabbit_amqqueue:update_mirroring(QPid)
+ end.
+
+-spec update_mirrors
+ (amqqueue:amqqueue()) -> 'ok'.
+
+update_mirrors(Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ {OldMNode, OldSNodes, _} = actual_queue_nodes(Q),
+ {NewMNode, NewSNodes} = suggested_queue_nodes(Q),
+ OldNodes = [OldMNode | OldSNodes],
+ NewNodes = [NewMNode | NewSNodes],
+ %% When a mirror dies, remove_from_queue/2 might have to add new
+ %% mirrors (in "exactly" mode). It will check mnesia to see which
+ %% mirrors there currently are. If drop_mirror/2 is invoked first
+ %% then when we end up in remove_from_queue/2 it will not see the
+ %% mirrors that add_mirror/2 will add, and also want to add them
+ %% (even though we are not responding to the death of a
+ %% mirror). Breakage ensues.
+ add_mirrors (QName, NewNodes -- OldNodes, async),
+ drop_mirrors(QName, OldNodes -- NewNodes),
+ %% This is for the case where no extra nodes were added but we changed to
+ %% a policy requiring auto-sync.
+ maybe_auto_sync(Q),
+ ok.
+
+queue_length(Q) ->
+ [{messages, M}] = rabbit_amqqueue:info(Q, [messages]),
+ M.
+
+get_replicas(Q) ->
+ {MNode, SNodes} = suggested_queue_nodes(Q),
+ [MNode] ++ SNodes.
+
+transfer_leadership(Q, Destination) ->
+ QName = amqqueue:get_name(Q),
+ {OldMNode, OldSNodes, _} = actual_queue_nodes(Q),
+ OldNodes = [OldMNode | OldSNodes],
+ add_mirrors(QName, [Destination] -- OldNodes, async),
+ drop_mirrors(QName, OldNodes -- [Destination]),
+ {Result, NewQ} = wait_for_new_master(QName, Destination),
+ update_mirrors(NewQ),
+ Result.
+
+wait_for_new_master(QName, Destination) ->
+ wait_for_new_master(QName, Destination, 100).
+
+wait_for_new_master(QName, _, 0) ->
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ {{not_migrated, ""}, Q};
+wait_for_new_master(QName, Destination, N) ->
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ case amqqueue:get_pid(Q) of
+ none ->
+ timer:sleep(100),
+ wait_for_new_master(QName, Destination, N - 1);
+ Pid ->
+ case node(Pid) of
+ Destination ->
+ {{migrated, Destination}, Q};
+ _ ->
+ timer:sleep(100),
+ wait_for_new_master(QName, Destination, N - 1)
+ end
+ end.
+
+%% The arrival of a newly synced mirror may cause the master to die if
+%% the policy does not want the master but it has been kept alive
+%% because there were no synced mirrors.
+%%
+%% We don't just call update_mirrors/2 here since that could decide to
+%% start a mirror for some other reason, and since we are the mirror ATM
+%% that allows complicated deadlocks.
+
+-spec maybe_drop_master_after_sync(amqqueue:amqqueue()) -> 'ok'.
+
+maybe_drop_master_after_sync(Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ MPid = amqqueue:get_pid(Q),
+ {DesiredMNode, DesiredSNodes} = suggested_queue_nodes(Q),
+ case node(MPid) of
+ DesiredMNode -> ok;
+ OldMNode -> false = lists:member(OldMNode, DesiredSNodes), %% [0]
+ drop_mirror(QName, OldMNode)
+ end,
+ ok.
+%% [0] ASSERTION - if the policy wants the master to change, it has
+%% not just shuffled it into the mirrors. All our modes ensure this
+%% does not happen, but we should guard against a misbehaving plugin.
+
+%%----------------------------------------------------------------------------
+
+validate_policy(KeyList) ->
+ Mode = proplists:get_value(<<"ha-mode">>, KeyList, none),
+ Params = proplists:get_value(<<"ha-params">>, KeyList, none),
+ SyncMode = proplists:get_value(<<"ha-sync-mode">>, KeyList, none),
+ SyncBatchSize = proplists:get_value(
+ <<"ha-sync-batch-size">>, KeyList, none),
+ PromoteOnShutdown = proplists:get_value(
+ <<"ha-promote-on-shutdown">>, KeyList, none),
+ PromoteOnFailure = proplists:get_value(
+ <<"ha-promote-on-failure">>, KeyList, none),
+ case {Mode, Params, SyncMode, SyncBatchSize, PromoteOnShutdown, PromoteOnFailure} of
+ {none, none, none, none, none, none} ->
+ ok;
+ {none, _, _, _, _, _} ->
+ {error, "ha-mode must be specified to specify ha-params, "
+ "ha-sync-mode or ha-promote-on-shutdown", []};
+ _ ->
+ validate_policies(
+ [{Mode, fun validate_mode/1},
+ {Params, ha_params_validator(Mode)},
+ {SyncMode, fun validate_sync_mode/1},
+ {SyncBatchSize, fun validate_sync_batch_size/1},
+ {PromoteOnShutdown, fun validate_pos/1},
+ {PromoteOnFailure, fun validate_pof/1}])
+ end.
+
+ha_params_validator(Mode) ->
+ fun(Val) ->
+ {ok, M} = module(Mode),
+ M:validate_policy(Val)
+ end.
+
+validate_policies([]) ->
+ ok;
+validate_policies([{Val, Validator} | Rest]) ->
+ case Validator(Val) of
+ ok -> validate_policies(Rest);
+ E -> E
+ end.
+
+validate_sync_mode(SyncMode) ->
+ case SyncMode of
+ <<"automatic">> -> ok;
+ <<"manual">> -> ok;
+ none -> ok;
+ Mode -> {error, "ha-sync-mode must be \"manual\" "
+ "or \"automatic\", got ~p", [Mode]}
+ end.
+
+validate_sync_batch_size(none) ->
+ ok;
+validate_sync_batch_size(N) when is_integer(N) andalso N > 0 ->
+ ok;
+validate_sync_batch_size(N) ->
+ {error, "ha-sync-batch-size takes an integer greater than 0, "
+ "~p given", [N]}.
+
+validate_pos(PromoteOnShutdown) ->
+ case PromoteOnShutdown of
+ <<"always">> -> ok;
+ <<"when-synced">> -> ok;
+ none -> ok;
+ Mode -> {error, "ha-promote-on-shutdown must be "
+ "\"always\" or \"when-synced\", got ~p", [Mode]}
+ end.
+
+validate_pof(PromoteOnShutdown) ->
+ case PromoteOnShutdown of
+ <<"always">> -> ok;
+ <<"when-synced">> -> ok;
+ none -> ok;
+ Mode -> {error, "ha-promote-on-failure must be "
+ "\"always\" or \"when-synced\", got ~p", [Mode]}
+ end.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode.erl b/deps/rabbit/src/rabbit_mirror_queue_mode.erl
new file mode 100644
index 0000000000..91491efc49
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_mode.erl
@@ -0,0 +1,42 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_mode).
+
+-behaviour(rabbit_registry_class).
+
+-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]).
+
+-type master() :: node().
+-type slave() :: node().
+-type params() :: any().
+
+-callback description() -> [proplists:property()].
+
+%% Called whenever we think we might need to change nodes for a
+%% mirrored queue. Note that this is called from a variety of
+%% contexts, both inside and outside Mnesia transactions. Ideally it
+%% will be pure-functional.
+%%
+%% Takes: parameters set in the policy,
+%% current master,
+%% current mirrors,
+%% current synchronised mirrors,
+%% all nodes to consider
+%%
+%% Returns: tuple of new master, new mirrors
+%%
+-callback suggested_queue_nodes(
+ params(), master(), [slave()], [slave()], [node()]) ->
+ {master(), [slave()]}.
+
+%% Are the parameters valid for this mode?
+-callback validate_policy(params()) ->
+ rabbit_policy_validator:validate_results().
+
+added_to_rabbit_registry(_Type, _ModuleName) -> ok.
+removed_from_rabbit_registry(_Type) -> ok.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl
new file mode 100644
index 0000000000..2da12a5972
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl
@@ -0,0 +1,32 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_mode_all).
+
+-include("rabbit.hrl").
+
+-behaviour(rabbit_mirror_queue_mode).
+
+-export([description/0, suggested_queue_nodes/5, validate_policy/1]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "mirror mode all"},
+ {mfa, {rabbit_registry, register,
+ [ha_mode, <<"all">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+description() ->
+ [{description, <<"Mirror queue to all nodes">>}].
+
+suggested_queue_nodes(_Params, MNode, _SNodes, _SSNodes, Poss) ->
+ {MNode, Poss -- [MNode]}.
+
+validate_policy(none) ->
+ ok;
+validate_policy(_Params) ->
+ {error, "ha-mode=\"all\" does not take parameters", []}.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl
new file mode 100644
index 0000000000..a8aa7546ac
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl
@@ -0,0 +1,45 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_mode_exactly).
+
+-include("rabbit.hrl").
+
+-behaviour(rabbit_mirror_queue_mode).
+
+-export([description/0, suggested_queue_nodes/5, validate_policy/1]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "mirror mode exactly"},
+ {mfa, {rabbit_registry, register,
+ [ha_mode, <<"exactly">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+description() ->
+ [{description, <<"Mirror queue to a specified number of nodes">>}].
+
+%% When we need to add nodes, we randomise our candidate list as a
+%% crude form of load-balancing. TODO it would also be nice to
+%% randomise the list of ones to remove when we have too many - we
+%% would have to take account of synchronisation though.
+suggested_queue_nodes(Count, MNode, SNodes, _SSNodes, Poss) ->
+ SCount = Count - 1,
+ {MNode, case SCount > length(SNodes) of
+ true -> Cand = shuffle((Poss -- [MNode]) -- SNodes),
+ SNodes ++ lists:sublist(Cand, SCount - length(SNodes));
+ false -> lists:sublist(SNodes, SCount)
+ end}.
+
+shuffle(L) ->
+ {_, L1} = lists:unzip(lists:keysort(1, [{rand:uniform(), N} || N <- L])),
+ L1.
+
+validate_policy(N) when is_integer(N) andalso N > 0 ->
+ ok;
+validate_policy(Params) ->
+ {error, "ha-mode=\"exactly\" takes an integer, ~p given", [Params]}.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl
new file mode 100644
index 0000000000..f3e134ba63
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl
@@ -0,0 +1,69 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_mode_nodes).
+
+-include("rabbit.hrl").
+
+-behaviour(rabbit_mirror_queue_mode).
+
+-export([description/0, suggested_queue_nodes/5, validate_policy/1]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "mirror mode nodes"},
+ {mfa, {rabbit_registry, register,
+ [ha_mode, <<"nodes">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+description() ->
+ [{description, <<"Mirror queue to specified nodes">>}].
+
+suggested_queue_nodes(PolicyNodes0, CurrentMaster, _SNodes, SSNodes, NodesRunningRabbitMQ) ->
+ PolicyNodes1 = [list_to_atom(binary_to_list(Node)) || Node <- PolicyNodes0],
+ %% If the current master is not in the nodes specified, then what we want
+ %% to do depends on whether there are any synchronised mirrors. If there
+ %% are then we can just kill the current master - the admin has asked for
+ %% a migration and we should give it to them. If there are not however
+ %% then we must keep the master around so as not to lose messages.
+
+ PolicyNodes = case SSNodes of
+ [] -> lists:usort([CurrentMaster | PolicyNodes1]);
+ _ -> PolicyNodes1
+ end,
+ Unavailable = PolicyNodes -- NodesRunningRabbitMQ,
+ AvailablePolicyNodes = PolicyNodes -- Unavailable,
+ case AvailablePolicyNodes of
+ [] -> %% We have never heard of anything? Not much we can do but
+ %% keep the master alive.
+ {CurrentMaster, []};
+ _ -> case lists:member(CurrentMaster, AvailablePolicyNodes) of
+ true -> {CurrentMaster,
+ AvailablePolicyNodes -- [CurrentMaster]};
+ false -> %% Make sure the new master is synced! In order to
+ %% get here SSNodes must not be empty.
+ SyncPolicyNodes = [Node ||
+ Node <- AvailablePolicyNodes,
+ lists:member(Node, SSNodes)],
+ NewMaster = case SyncPolicyNodes of
+ [Node | _] -> Node;
+ [] -> erlang:hd(SSNodes)
+ end,
+ {NewMaster, AvailablePolicyNodes -- [NewMaster]}
+ end
+ end.
+
+validate_policy([]) ->
+ {error, "ha-mode=\"nodes\" list must be non-empty", []};
+validate_policy(Nodes) when is_list(Nodes) ->
+ case [I || I <- Nodes, not is_binary(I)] of
+ [] -> ok;
+ Invalid -> {error, "ha-mode=\"nodes\" takes a list of strings, "
+ "~p was not a string", [Invalid]}
+ end;
+validate_policy(Params) ->
+ {error, "ha-mode=\"nodes\" takes a list, ~p given", [Params]}.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_slave.erl b/deps/rabbit/src/rabbit_mirror_queue_slave.erl
new file mode 100644
index 0000000000..0480db9cfe
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_slave.erl
@@ -0,0 +1,1093 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_slave).
+
+%% For general documentation of HA design, see
+%% rabbit_mirror_queue_coordinator
+%%
+%% We receive messages from GM and from publishers, and the gm
+%% messages can arrive either before or after the 'actual' message.
+%% All instructions from the GM group must be processed in the order
+%% in which they're received.
+
+-export([set_maximum_since_use/2, info/1, go/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3, handle_pre_hibernate/1, prioritise_call/4,
+ prioritise_cast/3, prioritise_info/3, format_message_queue/2]).
+
+-export([joined/2, members_changed/3, handle_msg/3, handle_terminate/2]).
+
+-behaviour(gen_server2).
+-behaviour(gm).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-include("amqqueue.hrl").
+-include("gm_specs.hrl").
+
+%%----------------------------------------------------------------------------
+
+-define(INFO_KEYS,
+ [pid,
+ name,
+ master_pid,
+ is_synchronised
+ ]).
+
+-define(SYNC_INTERVAL, 25). %% milliseconds
+-define(RAM_DURATION_UPDATE_INTERVAL, 5000).
+-define(DEATH_TIMEOUT, 20000). %% 20 seconds
+
+-record(state, { q,
+ gm,
+ backing_queue,
+ backing_queue_state,
+ sync_timer_ref,
+ rate_timer_ref,
+
+ sender_queues, %% :: Pid -> {Q Msg, Set MsgId, ChState}
+ msg_id_ack, %% :: MsgId -> AckTag
+
+ msg_id_status,
+ known_senders,
+
+ %% Master depth - local depth
+ depth_delta
+ }).
+
+%%----------------------------------------------------------------------------
+
+set_maximum_since_use(QPid, Age) ->
+ gen_server2:cast(QPid, {set_maximum_since_use, Age}).
+
+info(QPid) -> gen_server2:call(QPid, info, infinity).
+
+init(Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ ?store_proc_name(QName),
+ {ok, {not_started, Q}, hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN,
+ ?DESIRED_HIBERNATE}, ?MODULE}.
+
+go(SPid, sync) -> gen_server2:call(SPid, go, infinity);
+go(SPid, async) -> gen_server2:cast(SPid, go).
+
+handle_go(Q0) when ?is_amqqueue(Q0) ->
+ QName = amqqueue:get_name(Q0),
+ %% We join the GM group before we add ourselves to the amqqueue
+ %% record. As a result:
+ %% 1. We can receive msgs from GM that correspond to messages we will
+ %% never receive from publishers.
+ %% 2. When we receive a message from publishers, we must receive a
+ %% message from the GM group for it.
+ %% 3. However, that instruction from the GM group can arrive either
+ %% before or after the actual message. We need to be able to
+ %% distinguish between GM instructions arriving early, and case (1)
+ %% above.
+ %%
+ process_flag(trap_exit, true), %% amqqueue_process traps exits too.
+ {ok, GM} = gm:start_link(QName, ?MODULE, [self()],
+ fun rabbit_misc:execute_mnesia_transaction/1),
+ MRef = erlang:monitor(process, GM),
+ %% We ignore the DOWN message because we are also linked and
+ %% trapping exits, we just want to not get stuck and we will exit
+ %% later.
+ receive
+ {joined, GM} -> erlang:demonitor(MRef, [flush]),
+ ok;
+ {'DOWN', MRef, _, _, _} -> ok
+ end,
+ Self = self(),
+ Node = node(),
+ case rabbit_misc:execute_mnesia_transaction(
+ fun() -> init_it(Self, GM, Node, QName) end) of
+ {new, QPid, GMPids} ->
+ ok = file_handle_cache:register_callback(
+ rabbit_amqqueue, set_maximum_since_use, [Self]),
+ ok = rabbit_memory_monitor:register(
+ Self, {rabbit_amqqueue, set_ram_duration_target, [Self]}),
+ {ok, BQ} = application:get_env(backing_queue_module),
+ Q1 = amqqueue:set_pid(Q0, QPid),
+ _ = BQ:delete_crashed(Q1), %% For crash recovery
+ BQS = bq_init(BQ, Q1, new),
+ State = #state { q = Q1,
+ gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ rate_timer_ref = undefined,
+ sync_timer_ref = undefined,
+
+ sender_queues = #{},
+ msg_id_ack = #{},
+
+ msg_id_status = #{},
+ known_senders = pmon:new(delegate),
+
+ depth_delta = undefined
+ },
+ ok = gm:broadcast(GM, request_depth),
+ ok = gm:validate_members(GM, [GM | [G || {G, _} <- GMPids]]),
+ rabbit_mirror_queue_misc:maybe_auto_sync(Q1),
+ {ok, State};
+ {stale, StalePid} ->
+ rabbit_mirror_queue_misc:log_warning(
+ QName, "Detected stale HA master: ~p~n", [StalePid]),
+ gm:leave(GM),
+ {error, {stale_master_pid, StalePid}};
+ duplicate_live_master ->
+ gm:leave(GM),
+ {error, {duplicate_live_master, Node}};
+ existing ->
+ gm:leave(GM),
+ {error, normal};
+ master_in_recovery ->
+ gm:leave(GM),
+ %% The queue record vanished - we must have a master starting
+ %% concurrently with us. In that case we can safely decide to do
+ %% nothing here, and the master will start us in
+ %% master:init_with_existing_bq/3
+ {error, normal}
+ end.
+
+init_it(Self, GM, Node, QName) ->
+ case mnesia:read({rabbit_queue, QName}) of
+ [Q] when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ SPids = amqqueue:get_slave_pids(Q),
+ GMPids = amqqueue:get_gm_pids(Q),
+ PSPids = amqqueue:get_slave_pids_pending_shutdown(Q),
+ case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of
+ [] -> stop_pending_slaves(QName, PSPids),
+ add_slave(Q, Self, GM),
+ {new, QPid, GMPids};
+ [QPid] -> case rabbit_mnesia:is_process_alive(QPid) of
+ true -> duplicate_live_master;
+ false -> {stale, QPid}
+ end;
+ [SPid] -> case rabbit_mnesia:is_process_alive(SPid) of
+ true -> existing;
+ false -> GMPids1 = [T || T = {_, S} <- GMPids, S =/= SPid],
+ SPids1 = SPids -- [SPid],
+ Q1 = amqqueue:set_slave_pids(Q, SPids1),
+ Q2 = amqqueue:set_gm_pids(Q1, GMPids1),
+ add_slave(Q2, Self, GM),
+ {new, QPid, GMPids1}
+ end
+ end;
+ [] ->
+ master_in_recovery
+ end.
+
+%% Pending mirrors have been asked to stop by the master, but despite the node
+%% being up these did not answer on the expected timeout. Stop local mirrors now.
+stop_pending_slaves(QName, Pids) ->
+ [begin
+ rabbit_mirror_queue_misc:log_warning(
+ QName, "Detected a non-responsive classic queue mirror, stopping it: ~p~n", [Pid]),
+ case erlang:process_info(Pid, dictionary) of
+ undefined -> ok;
+ {dictionary, Dict} ->
+ Vhost = QName#resource.virtual_host,
+ {ok, AmqQSup} = rabbit_amqqueue_sup_sup:find_for_vhost(Vhost),
+ case proplists:get_value('$ancestors', Dict) of
+ [Sup, AmqQSup | _] ->
+ exit(Sup, kill),
+ exit(Pid, kill);
+ _ ->
+ ok
+ end
+ end
+ end || Pid <- Pids, node(Pid) =:= node(),
+ true =:= erlang:is_process_alive(Pid)].
+
+%% Add to the end, so they are in descending order of age, see
+%% rabbit_mirror_queue_misc:promote_slave/1
+add_slave(Q0, New, GM) when ?is_amqqueue(Q0) ->
+ SPids = amqqueue:get_slave_pids(Q0),
+ GMPids = amqqueue:get_gm_pids(Q0),
+ SPids1 = SPids ++ [New],
+ GMPids1 = [{GM, New} | GMPids],
+ Q1 = amqqueue:set_slave_pids(Q0, SPids1),
+ Q2 = amqqueue:set_gm_pids(Q1, GMPids1),
+ rabbit_mirror_queue_misc:store_updated_slaves(Q2).
+
+handle_call(go, _From, {not_started, Q} = NotStarted) ->
+ case handle_go(Q) of
+ {ok, State} -> {reply, ok, State};
+ {error, Error} -> {stop, Error, NotStarted}
+ end;
+
+handle_call({gm_deaths, DeadGMPids}, From,
+ State = #state{ gm = GM, q = Q,
+ backing_queue = BQ,
+ backing_queue_state = BQS}) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ MPid = amqqueue:get_pid(Q),
+ Self = self(),
+ case rabbit_mirror_queue_misc:remove_from_queue(QName, Self, DeadGMPids) of
+ {error, not_found} ->
+ gen_server2:reply(From, ok),
+ {stop, normal, State};
+ {error, {not_synced, _SPids}} ->
+ BQ:delete_and_terminate({error, not_synced}, BQS),
+ {stop, normal, State#state{backing_queue_state = undefined}};
+ {ok, Pid, DeadPids, ExtraNodes} ->
+ rabbit_mirror_queue_misc:report_deaths(Self, false, QName,
+ DeadPids),
+ case Pid of
+ MPid ->
+ %% master hasn't changed
+ gen_server2:reply(From, ok),
+ rabbit_mirror_queue_misc:add_mirrors(
+ QName, ExtraNodes, async),
+ noreply(State);
+ Self ->
+ %% we've become master
+ QueueState = promote_me(From, State),
+ rabbit_mirror_queue_misc:add_mirrors(
+ QName, ExtraNodes, async),
+ {become, rabbit_amqqueue_process, QueueState, hibernate};
+ _ ->
+ %% master has changed to not us
+ gen_server2:reply(From, ok),
+ %% see rabbitmq-server#914;
+ %% It's not always guaranteed that we won't have ExtraNodes.
+ %% If gm alters, master can change to not us with extra nodes,
+ %% in which case we attempt to add mirrors on those nodes.
+ case ExtraNodes of
+ [] -> void;
+ _ -> rabbit_mirror_queue_misc:add_mirrors(
+ QName, ExtraNodes, async)
+ end,
+ %% Since GM is by nature lazy we need to make sure
+ %% there is some traffic when a master dies, to
+ %% make sure all mirrors get informed of the
+ %% death. That is all process_death does, create
+ %% some traffic.
+ ok = gm:broadcast(GM, process_death),
+ Q1 = amqqueue:set_pid(Q, Pid),
+ State1 = State#state{q = Q1},
+ noreply(State1)
+ end
+ end;
+
+handle_call(info, _From, State) ->
+ reply(infos(?INFO_KEYS, State), State).
+
+handle_cast(go, {not_started, Q} = NotStarted) ->
+ case handle_go(Q) of
+ {ok, State} -> {noreply, State};
+ {error, Error} -> {stop, Error, NotStarted}
+ end;
+
+handle_cast({run_backing_queue, Mod, Fun}, State) ->
+ noreply(run_backing_queue(Mod, Fun, State));
+
+handle_cast({gm, Instruction}, State = #state{q = Q0}) when ?is_amqqueue(Q0) ->
+ QName = amqqueue:get_name(Q0),
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q1} when ?is_amqqueue(Q1) ->
+ SPids = amqqueue:get_slave_pids(Q1),
+ case lists:member(self(), SPids) of
+ true ->
+ handle_process_result(process_instruction(Instruction, State));
+ false ->
+ %% Potentially a duplicated mirror caused by a partial partition,
+ %% will stop as a new mirror could start unaware of our presence
+ {stop, shutdown, State}
+ end;
+ {error, not_found} ->
+ %% Would not expect this to happen after fixing #953
+ {stop, shutdown, State}
+ end;
+
+handle_cast({deliver, Delivery = #delivery{sender = Sender, flow = Flow}, true},
+ State) ->
+ %% Asynchronous, non-"mandatory", deliver mode.
+ %% We are acking messages to the channel process that sent us
+ %% the message delivery. See
+ %% rabbit_amqqueue_process:handle_ch_down for more info.
+ %% If message is rejected by the master, the publish will be nacked
+ %% even if mirrors confirm it. No need to check for length here.
+ maybe_flow_ack(Sender, Flow),
+ noreply(maybe_enqueue_message(Delivery, State));
+
+handle_cast({sync_start, Ref, Syncer},
+ State = #state { depth_delta = DD,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State1 = #state{rate_timer_ref = TRef} = ensure_rate_timer(State),
+ S = fun({MA, TRefN, BQSN}) ->
+ State1#state{depth_delta = undefined,
+ msg_id_ack = maps:from_list(MA),
+ rate_timer_ref = TRefN,
+ backing_queue_state = BQSN}
+ end,
+ case rabbit_mirror_queue_sync:slave(
+ DD, Ref, TRef, Syncer, BQ, BQS,
+ fun (BQN, BQSN) ->
+ BQSN1 = update_ram_duration(BQN, BQSN),
+ TRefN = rabbit_misc:send_after(?RAM_DURATION_UPDATE_INTERVAL,
+ self(), update_ram_duration),
+ {TRefN, BQSN1}
+ end) of
+ denied -> noreply(State1);
+ {ok, Res} -> noreply(set_delta(0, S(Res)));
+ {failed, Res} -> noreply(S(Res));
+ {stop, Reason, Res} -> {stop, Reason, S(Res)}
+ end;
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+ ok = file_handle_cache:set_maximum_since_use(Age),
+ noreply(State);
+
+handle_cast({set_ram_duration_target, Duration},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ BQS1 = BQ:set_ram_duration_target(Duration, BQS),
+ noreply(State #state { backing_queue_state = BQS1 });
+
+handle_cast(policy_changed, State) ->
+ %% During partial partitions, we might end up receiving messages expected by a master
+ %% Ignore them
+ noreply(State).
+
+handle_info(update_ram_duration, State = #state{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ BQS1 = update_ram_duration(BQ, BQS),
+ %% Don't call noreply/1, we don't want to set timers
+ {State1, Timeout} = next_state(State #state {
+ rate_timer_ref = undefined,
+ backing_queue_state = BQS1 }),
+ {noreply, State1, Timeout};
+
+handle_info(sync_timeout, State) ->
+ noreply(backing_queue_timeout(
+ State #state { sync_timer_ref = undefined }));
+
+handle_info(timeout, State) ->
+ noreply(backing_queue_timeout(State));
+
+handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) ->
+ local_sender_death(ChPid, State),
+ noreply(maybe_forget_sender(ChPid, down_from_ch, State));
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+ {stop, Reason, State};
+
+handle_info({bump_credit, Msg}, State) ->
+ credit_flow:handle_bump_msg(Msg),
+ noreply(State);
+
+handle_info(bump_reduce_memory_use, State) ->
+ noreply(State);
+
+%% In the event of a short partition during sync we can detect the
+%% master's 'death', drop out of sync, and then receive sync messages
+%% which were still in flight. Ignore them.
+handle_info({sync_msg, _Ref, _Msg, _Props, _Unacked}, State) ->
+ noreply(State);
+
+handle_info({sync_complete, _Ref}, State) ->
+ noreply(State);
+
+handle_info(Msg, State) ->
+ {stop, {unexpected_info, Msg}, State}.
+
+terminate(_Reason, {not_started, _Q}) ->
+ ok;
+terminate(_Reason, #state { backing_queue_state = undefined }) ->
+ %% We've received a delete_and_terminate from gm, thus nothing to
+ %% do here.
+ ok;
+terminate({shutdown, dropped} = R, State = #state{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ %% See rabbit_mirror_queue_master:terminate/2
+ terminate_common(State),
+ BQ:delete_and_terminate(R, BQS);
+terminate(shutdown, State) ->
+ terminate_shutdown(shutdown, State);
+terminate({shutdown, _} = R, State) ->
+ terminate_shutdown(R, State);
+terminate(Reason, State = #state{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ terminate_common(State),
+ BQ:delete_and_terminate(Reason, BQS).
+
+%% If the Reason is shutdown, or {shutdown, _}, it is not the queue
+%% being deleted: it's just the node going down. Even though we're a
+%% mirror, we have no idea whether or not we'll be the only copy coming
+%% back up. Thus we must assume we will be, and preserve anything we
+%% have on disk.
+terminate_shutdown(Reason, State = #state{backing_queue = BQ,
+ backing_queue_state = BQS}) ->
+ terminate_common(State),
+ BQ:terminate(Reason, BQS).
+
+terminate_common(State) ->
+ ok = rabbit_memory_monitor:deregister(self()),
+ stop_rate_timer(stop_sync_timer(State)).
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+handle_pre_hibernate({not_started, _Q} = State) ->
+ {hibernate, State};
+
+handle_pre_hibernate(State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {RamDuration, BQS1} = BQ:ram_duration(BQS),
+ DesiredDuration =
+ rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+ BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
+ BQS3 = BQ:handle_pre_hibernate(BQS2),
+ {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}.
+
+prioritise_call(Msg, _From, _Len, _State) ->
+ case Msg of
+ info -> 9;
+ {gm_deaths, _Dead} -> 5;
+ _ -> 0
+ end.
+
+prioritise_cast(Msg, _Len, _State) ->
+ case Msg of
+ {set_ram_duration_target, _Duration} -> 8;
+ {set_maximum_since_use, _Age} -> 8;
+ {run_backing_queue, _Mod, _Fun} -> 6;
+ {gm, _Msg} -> 5;
+ _ -> 0
+ end.
+
+prioritise_info(Msg, _Len, _State) ->
+ case Msg of
+ update_ram_duration -> 8;
+ sync_timeout -> 6;
+ _ -> 0
+ end.
+
+format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ).
+
+%% ---------------------------------------------------------------------------
+%% GM
+%% ---------------------------------------------------------------------------
+
+joined([SPid], _Members) -> SPid ! {joined, self()}, ok.
+
+members_changed([_SPid], _Births, []) ->
+ ok;
+members_changed([ SPid], _Births, Deaths) ->
+ case rabbit_misc:with_exit_handler(
+ rabbit_misc:const(ok),
+ fun() ->
+ gen_server2:call(SPid, {gm_deaths, Deaths}, infinity)
+ end) of
+ ok -> ok;
+ {promote, CPid} -> {become, rabbit_mirror_queue_coordinator, [CPid]}
+ end.
+
+handle_msg([_SPid], _From, hibernate_heartbeat) ->
+ %% See rabbit_mirror_queue_coordinator:handle_pre_hibernate/1
+ ok;
+handle_msg([_SPid], _From, request_depth) ->
+ %% This is only of value to the master
+ ok;
+handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) ->
+ %% This is only of value to the master
+ ok;
+handle_msg([_SPid], _From, process_death) ->
+ %% We must not take any notice of the master death here since it
+ %% comes without ordering guarantees - there could still be
+ %% messages from the master we have yet to receive. When we get
+ %% members_changed, then there will be no more messages.
+ ok;
+handle_msg([CPid], _From, {delete_and_terminate, _Reason} = Msg) ->
+ ok = gen_server2:cast(CPid, {gm, Msg}),
+ {stop, {shutdown, ring_shutdown}};
+handle_msg([SPid], _From, {sync_start, Ref, Syncer, SPids}) ->
+ case lists:member(SPid, SPids) of
+ true -> gen_server2:cast(SPid, {sync_start, Ref, Syncer});
+ false -> ok
+ end;
+handle_msg([SPid], _From, Msg) ->
+ ok = gen_server2:cast(SPid, {gm, Msg}).
+
+handle_terminate([_SPid], _Reason) ->
+ ok.
+
+%% ---------------------------------------------------------------------------
+%% Others
+%% ---------------------------------------------------------------------------
+
+infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
+
+i(pid, _State) ->
+ self();
+i(name, #state{q = Q}) when ?is_amqqueue(Q) ->
+ amqqueue:get_name(Q);
+i(master_pid, #state{q = Q}) when ?is_amqqueue(Q) ->
+ amqqueue:get_pid(Q);
+i(is_synchronised, #state{depth_delta = DD}) ->
+ DD =:= 0;
+i(_, _) ->
+ ''.
+
+bq_init(BQ, Q, Recover) ->
+ Self = self(),
+ BQ:init(Q, Recover,
+ fun (Mod, Fun) ->
+ rabbit_amqqueue:run_backing_queue(Self, Mod, Fun)
+ end).
+
+run_backing_queue(rabbit_mirror_queue_master, Fun, State) ->
+ %% Yes, this might look a little crazy, but see comments in
+ %% confirm_sender_death/1
+ Fun(?MODULE, State);
+run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }.
+
+%% This feature was used by `rabbit_amqqueue_process` and
+%% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x. It is
+%% unused in 3.8.x and thus deprecated. We keep it to support in-place
+%% upgrades to 3.8.x (i.e. mixed-version clusters), but it is a no-op
+%% starting with that version.
+send_mandatory(#delivery{mandatory = false}) ->
+ ok;
+send_mandatory(#delivery{mandatory = true,
+ sender = SenderPid,
+ msg_seq_no = MsgSeqNo}) ->
+ gen_server2:cast(SenderPid, {mandatory_received, MsgSeqNo}).
+
+send_or_record_confirm(_, #delivery{ confirm = false }, MS, _State) ->
+ MS;
+send_or_record_confirm(published, #delivery { sender = ChPid,
+ confirm = true,
+ msg_seq_no = MsgSeqNo,
+ message = #basic_message {
+ id = MsgId,
+ is_persistent = true } },
+ MS, #state{q = Q}) when ?amqqueue_is_durable(Q) ->
+ maps:put(MsgId, {published, ChPid, MsgSeqNo} , MS);
+send_or_record_confirm(_Status, #delivery { sender = ChPid,
+ confirm = true,
+ msg_seq_no = MsgSeqNo },
+ MS, #state{q = Q} = _State) ->
+ ok = rabbit_classic_queue:confirm_to_sender(ChPid,
+ amqqueue:get_name(Q), [MsgSeqNo]),
+ MS.
+
+confirm_messages(MsgIds, State = #state{q = Q, msg_id_status = MS}) ->
+ QName = amqqueue:get_name(Q),
+ {CMs, MS1} =
+ lists:foldl(
+ fun (MsgId, {CMsN, MSN} = Acc) ->
+ %% We will never see 'discarded' here
+ case maps:find(MsgId, MSN) of
+ error ->
+ %% If it needed confirming, it'll have
+ %% already been done.
+ Acc;
+ {ok, published} ->
+ %% Still not seen it from the channel, just
+ %% record that it's been confirmed.
+ {CMsN, maps:put(MsgId, confirmed, MSN)};
+ {ok, {published, ChPid, MsgSeqNo}} ->
+ %% Seen from both GM and Channel. Can now
+ %% confirm.
+ {rabbit_misc:gb_trees_cons(ChPid, MsgSeqNo, CMsN),
+ maps:remove(MsgId, MSN)};
+ {ok, confirmed} ->
+ %% It's already been confirmed. This is
+ %% probably it's been both sync'd to disk
+ %% and then delivered and ack'd before we've
+ %% seen the publish from the
+ %% channel. Nothing to do here.
+ Acc
+ end
+ end, {gb_trees:empty(), MS}, MsgIds),
+ Fun = fun (Pid, MsgSeqNos) ->
+ rabbit_classic_queue:confirm_to_sender(Pid, QName, MsgSeqNos)
+ end,
+ rabbit_misc:gb_trees_foreach(Fun, CMs),
+ State #state { msg_id_status = MS1 }.
+
+handle_process_result({ok, State}) -> noreply(State);
+handle_process_result({stop, State}) -> {stop, normal, State}.
+
+-spec promote_me({pid(), term()}, #state{}) -> no_return().
+
+promote_me(From, #state { q = Q0,
+ gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ rate_timer_ref = RateTRef,
+ sender_queues = SQ,
+ msg_id_ack = MA,
+ msg_id_status = MS,
+ known_senders = KS}) when ?is_amqqueue(Q0) ->
+ QName = amqqueue:get_name(Q0),
+ rabbit_mirror_queue_misc:log_info(QName, "Promoting mirror ~s to master~n",
+ [rabbit_misc:pid_to_string(self())]),
+ Q1 = amqqueue:set_pid(Q0, self()),
+ DeathFun = rabbit_mirror_queue_master:sender_death_fun(),
+ DepthFun = rabbit_mirror_queue_master:depth_fun(),
+ {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q1, GM, DeathFun, DepthFun),
+ true = unlink(GM),
+ gen_server2:reply(From, {promote, CPid}),
+
+ %% Everything that we're monitoring, we need to ensure our new
+ %% coordinator is monitoring.
+ MPids = pmon:monitored(KS),
+ ok = rabbit_mirror_queue_coordinator:ensure_monitoring(CPid, MPids),
+
+ %% We find all the messages that we've received from channels but
+ %% not from gm, and pass them to the
+ %% queue_process:init_with_backing_queue_state to be enqueued.
+ %%
+ %% We also have to requeue messages which are pending acks: the
+ %% consumers from the master queue have been lost and so these
+ %% messages need requeuing. They might also be pending
+ %% confirmation, and indeed they might also be pending arrival of
+ %% the publication from the channel itself, if we received both
+ %% the publication and the fetch via gm first! Requeuing doesn't
+ %% affect confirmations: if the message was previously pending a
+ %% confirmation then it still will be, under the same msg_id. So
+ %% as a master, we need to be prepared to filter out the
+ %% publication of said messages from the channel (is_duplicate
+ %% (thus such requeued messages must remain in the msg_id_status
+ %% (MS) which becomes seen_status (SS) in the master)).
+ %%
+ %% Then there are messages we already have in the queue, which are
+ %% not currently pending acknowledgement:
+ %% 1. Messages we've only received via gm:
+ %% Filter out subsequent publication from channel through
+ %% validate_message. Might have to issue confirms then or
+ %% later, thus queue_process state will have to know that
+ %% there's a pending confirm.
+ %% 2. Messages received via both gm and channel:
+ %% Queue will have to deal with issuing confirms if necessary.
+ %%
+ %% MS contains the following three entry types:
+ %%
+ %% a) published:
+ %% published via gm only; pending arrival of publication from
+ %% channel, maybe pending confirm.
+ %%
+ %% b) {published, ChPid, MsgSeqNo}:
+ %% published via gm and channel; pending confirm.
+ %%
+ %% c) confirmed:
+ %% published via gm only, and confirmed; pending publication
+ %% from channel.
+ %%
+ %% d) discarded:
+ %% seen via gm only as discarded. Pending publication from
+ %% channel
+ %%
+ %% The forms a, c and d only, need to go to the master state
+ %% seen_status (SS).
+ %%
+ %% The form b only, needs to go through to the queue_process
+ %% state to form the msg_id_to_channel mapping (MTC).
+ %%
+ %% No messages that are enqueued from SQ at this point will have
+ %% entries in MS.
+ %%
+ %% Messages that are extracted from MA may have entries in MS, and
+ %% those messages are then requeued. However, as discussed above,
+ %% this does not affect MS, nor which bits go through to SS in
+ %% Master, or MTC in queue_process.
+
+ St = [published, confirmed, discarded],
+ SS = maps:filter(fun (_MsgId, Status) -> lists:member(Status, St) end, MS),
+ AckTags = [AckTag || {_MsgId, AckTag} <- maps:to_list(MA)],
+
+ MasterState = rabbit_mirror_queue_master:promote_backing_queue_state(
+ QName, CPid, BQ, BQS, GM, AckTags, SS, MPids),
+
+ MTC = maps:fold(fun (MsgId, {published, ChPid, MsgSeqNo}, MTC0) ->
+ maps:put(MsgId, {ChPid, MsgSeqNo}, MTC0);
+ (_Msgid, _Status, MTC0) ->
+ MTC0
+ end, #{}, MS),
+ Deliveries = [promote_delivery(Delivery) ||
+ {_ChPid, {PubQ, _PendCh, _ChState}} <- maps:to_list(SQ),
+ Delivery <- queue:to_list(PubQ)],
+ AwaitGmDown = [ChPid || {ChPid, {_, _, down_from_ch}} <- maps:to_list(SQ)],
+ KS1 = lists:foldl(fun (ChPid0, KS0) ->
+ pmon:demonitor(ChPid0, KS0)
+ end, KS, AwaitGmDown),
+ rabbit_misc:store_proc_name(rabbit_amqqueue_process, QName),
+ rabbit_amqqueue_process:init_with_backing_queue_state(
+ Q1, rabbit_mirror_queue_master, MasterState, RateTRef, Deliveries, KS1,
+ MTC).
+
+%% We reset mandatory to false here because we will have sent the
+%% mandatory_received already as soon as we got the message. We also
+%% need to send an ack for these messages since the channel is waiting
+%% for one for the via-GM case and we will not now receive one.
+promote_delivery(Delivery = #delivery{sender = Sender, flow = Flow}) ->
+ maybe_flow_ack(Sender, Flow),
+ Delivery#delivery{mandatory = false}.
+
+noreply(State) ->
+ {NewState, Timeout} = next_state(State),
+ {noreply, ensure_rate_timer(NewState), Timeout}.
+
+reply(Reply, State) ->
+ {NewState, Timeout} = next_state(State),
+ {reply, Reply, ensure_rate_timer(NewState), Timeout}.
+
+next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) ->
+ {MsgIds, BQS1} = BQ:drain_confirmed(BQS),
+ State1 = confirm_messages(MsgIds,
+ State #state { backing_queue_state = BQS1 }),
+ case BQ:needs_timeout(BQS1) of
+ false -> {stop_sync_timer(State1), hibernate };
+ idle -> {stop_sync_timer(State1), ?SYNC_INTERVAL};
+ timed -> {ensure_sync_timer(State1), 0 }
+ end.
+
+backing_queue_timeout(State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State#state{backing_queue_state = BQ:timeout(BQS)}.
+
+ensure_sync_timer(State) ->
+ rabbit_misc:ensure_timer(State, #state.sync_timer_ref,
+ ?SYNC_INTERVAL, sync_timeout).
+
+stop_sync_timer(State) -> rabbit_misc:stop_timer(State, #state.sync_timer_ref).
+
+ensure_rate_timer(State) ->
+ rabbit_misc:ensure_timer(State, #state.rate_timer_ref,
+ ?RAM_DURATION_UPDATE_INTERVAL,
+ update_ram_duration).
+
+stop_rate_timer(State) -> rabbit_misc:stop_timer(State, #state.rate_timer_ref).
+
+ensure_monitoring(ChPid, State = #state { known_senders = KS }) ->
+ State #state { known_senders = pmon:monitor(ChPid, KS) }.
+
+local_sender_death(ChPid, #state { known_senders = KS }) ->
+ %% The channel will be monitored iff we have received a delivery
+ %% from it but not heard about its death from the master. So if it
+ %% is monitored we need to point the death out to the master (see
+ %% essay).
+ ok = case pmon:is_monitored(ChPid, KS) of
+ false -> ok;
+ true -> confirm_sender_death(ChPid)
+ end.
+
+confirm_sender_death(Pid) ->
+ %% We have to deal with the possibility that we'll be promoted to
+ %% master before this thing gets run. Consequently we set the
+ %% module to rabbit_mirror_queue_master so that if we do become a
+ %% rabbit_amqqueue_process before then, sane things will happen.
+ Fun =
+ fun (?MODULE, State = #state { known_senders = KS,
+ gm = GM }) ->
+ %% We're running still as a mirror
+ %%
+ %% See comment in local_sender_death/2; we might have
+ %% received a sender_death in the meanwhile so check
+ %% again.
+ ok = case pmon:is_monitored(Pid, KS) of
+ false -> ok;
+ true -> gm:broadcast(GM, {ensure_monitoring, [Pid]}),
+ confirm_sender_death(Pid)
+ end,
+ State;
+ (rabbit_mirror_queue_master, State) ->
+ %% We've become a master. State is now opaque to
+ %% us. When we became master, if Pid was still known
+ %% to us then we'd have set up monitoring of it then,
+ %% so this is now a noop.
+ State
+ end,
+ %% Note that we do not remove our knowledge of this ChPid until we
+ %% get the sender_death from GM as well as a DOWN notification.
+ {ok, _TRef} = timer:apply_after(
+ ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue,
+ [self(), rabbit_mirror_queue_master, Fun]),
+ ok.
+
+forget_sender(_, running) -> false;
+forget_sender(down_from_gm, down_from_gm) -> false; %% [1]
+forget_sender(down_from_ch, down_from_ch) -> false;
+forget_sender(Down1, Down2) when Down1 =/= Down2 -> true.
+
+%% [1] If another mirror goes through confirm_sender_death/1 before we
+%% do we can get two GM sender_death messages in a row for the same
+%% channel - don't treat that as anything special.
+
+%% Record and process lifetime events from channels. Forget all about a channel
+%% only when down notifications are received from both the channel and from gm.
+maybe_forget_sender(ChPid, ChState, State = #state { sender_queues = SQ,
+ msg_id_status = MS,
+ known_senders = KS }) ->
+ case maps:find(ChPid, SQ) of
+ error ->
+ State;
+ {ok, {MQ, PendCh, ChStateRecord}} ->
+ case forget_sender(ChState, ChStateRecord) of
+ true ->
+ credit_flow:peer_down(ChPid),
+ State #state { sender_queues = maps:remove(ChPid, SQ),
+ msg_id_status = lists:foldl(
+ fun maps:remove/2,
+ MS, sets:to_list(PendCh)),
+ known_senders = pmon:demonitor(ChPid, KS) };
+ false ->
+ SQ1 = maps:put(ChPid, {MQ, PendCh, ChState}, SQ),
+ State #state { sender_queues = SQ1 }
+ end
+ end.
+
+maybe_enqueue_message(
+ Delivery = #delivery { message = #basic_message { id = MsgId },
+ sender = ChPid },
+ State = #state { sender_queues = SQ, msg_id_status = MS }) ->
+ send_mandatory(Delivery), %% must do this before confirms
+ State1 = ensure_monitoring(ChPid, State),
+ %% We will never see {published, ChPid, MsgSeqNo} here.
+ case maps:find(MsgId, MS) of
+ error ->
+ {MQ, PendingCh, ChState} = get_sender_queue(ChPid, SQ),
+ MQ1 = queue:in(Delivery, MQ),
+ SQ1 = maps:put(ChPid, {MQ1, PendingCh, ChState}, SQ),
+ State1 #state { sender_queues = SQ1 };
+ {ok, Status} ->
+ MS1 = send_or_record_confirm(
+ Status, Delivery, maps:remove(MsgId, MS), State1),
+ SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+ State1 #state { msg_id_status = MS1,
+ sender_queues = SQ1 }
+ end.
+
+get_sender_queue(ChPid, SQ) ->
+ case maps:find(ChPid, SQ) of
+ error -> {queue:new(), sets:new(), running};
+ {ok, Val} -> Val
+ end.
+
+remove_from_pending_ch(MsgId, ChPid, SQ) ->
+ case maps:find(ChPid, SQ) of
+ error ->
+ SQ;
+ {ok, {MQ, PendingCh, ChState}} ->
+ maps:put(ChPid, {MQ, sets:del_element(MsgId, PendingCh), ChState},
+ SQ)
+ end.
+
+publish_or_discard(Status, ChPid, MsgId,
+ State = #state { sender_queues = SQ, msg_id_status = MS }) ->
+ %% We really are going to do the publish/discard right now, even
+ %% though we may not have seen it directly from the channel. But
+ %% we cannot issue confirms until the latter has happened. So we
+ %% need to keep track of the MsgId and its confirmation status in
+ %% the meantime.
+ State1 = ensure_monitoring(ChPid, State),
+ {MQ, PendingCh, ChState} = get_sender_queue(ChPid, SQ),
+ {MQ1, PendingCh1, MS1} =
+ case queue:out(MQ) of
+ {empty, _MQ2} ->
+ {MQ, sets:add_element(MsgId, PendingCh),
+ maps:put(MsgId, Status, MS)};
+ {{value, Delivery = #delivery {
+ message = #basic_message { id = MsgId } }}, MQ2} ->
+ {MQ2, PendingCh,
+ %% We received the msg from the channel first. Thus
+ %% we need to deal with confirms here.
+ send_or_record_confirm(Status, Delivery, MS, State1)};
+ {{value, #delivery {}}, _MQ2} ->
+ %% The instruction was sent to us before we were
+ %% within the slave_pids within the #amqqueue{}
+ %% record. We'll never receive the message directly
+ %% from the channel. And the channel will not be
+ %% expecting any confirms from us.
+ {MQ, PendingCh, MS}
+ end,
+ SQ1 = maps:put(ChPid, {MQ1, PendingCh1, ChState}, SQ),
+ State1 #state { sender_queues = SQ1, msg_id_status = MS1 }.
+
+
+process_instruction({publish, ChPid, Flow, MsgProps,
+ Msg = #basic_message { id = MsgId }}, State) ->
+ maybe_flow_ack(ChPid, Flow),
+ State1 = #state { backing_queue = BQ, backing_queue_state = BQS } =
+ publish_or_discard(published, ChPid, MsgId, State),
+ BQS1 = BQ:publish(Msg, MsgProps, true, ChPid, Flow, BQS),
+ {ok, State1 #state { backing_queue_state = BQS1 }};
+process_instruction({batch_publish, ChPid, Flow, Publishes}, State) ->
+ maybe_flow_ack(ChPid, Flow),
+ State1 = #state { backing_queue = BQ, backing_queue_state = BQS } =
+ lists:foldl(fun ({#basic_message { id = MsgId },
+ _MsgProps, _IsDelivered}, St) ->
+ publish_or_discard(published, ChPid, MsgId, St)
+ end, State, Publishes),
+ BQS1 = BQ:batch_publish(Publishes, ChPid, Flow, BQS),
+ {ok, State1 #state { backing_queue_state = BQS1 }};
+process_instruction({publish_delivered, ChPid, Flow, MsgProps,
+ Msg = #basic_message { id = MsgId }}, State) ->
+ maybe_flow_ack(ChPid, Flow),
+ State1 = #state { backing_queue = BQ, backing_queue_state = BQS } =
+ publish_or_discard(published, ChPid, MsgId, State),
+ true = BQ:is_empty(BQS),
+ {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, Flow, BQS),
+ {ok, maybe_store_ack(true, MsgId, AckTag,
+ State1 #state { backing_queue_state = BQS1 })};
+process_instruction({batch_publish_delivered, ChPid, Flow, Publishes}, State) ->
+ maybe_flow_ack(ChPid, Flow),
+ {MsgIds,
+ State1 = #state { backing_queue = BQ, backing_queue_state = BQS }} =
+ lists:foldl(fun ({#basic_message { id = MsgId }, _MsgProps},
+ {MsgIds, St}) ->
+ {[MsgId | MsgIds],
+ publish_or_discard(published, ChPid, MsgId, St)}
+ end, {[], State}, Publishes),
+ true = BQ:is_empty(BQS),
+ {AckTags, BQS1} = BQ:batch_publish_delivered(Publishes, ChPid, Flow, BQS),
+ MsgIdsAndAcks = lists:zip(lists:reverse(MsgIds), AckTags),
+ State2 = lists:foldl(
+ fun ({MsgId, AckTag}, St) ->
+ maybe_store_ack(true, MsgId, AckTag, St)
+ end, State1 #state { backing_queue_state = BQS1 },
+ MsgIdsAndAcks),
+ {ok, State2};
+process_instruction({discard, ChPid, Flow, MsgId}, State) ->
+ maybe_flow_ack(ChPid, Flow),
+ State1 = #state { backing_queue = BQ, backing_queue_state = BQS } =
+ publish_or_discard(discarded, ChPid, MsgId, State),
+ BQS1 = BQ:discard(MsgId, ChPid, Flow, BQS),
+ {ok, State1 #state { backing_queue_state = BQS1 }};
+process_instruction({drop, Length, Dropped, AckRequired},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ QLen = BQ:len(BQS),
+ ToDrop = case QLen - Length of
+ N when N > 0 -> N;
+ _ -> 0
+ end,
+ State1 = lists:foldl(
+ fun (const, StateN = #state{backing_queue_state = BQSN}) ->
+ {{MsgId, AckTag}, BQSN1} = BQ:drop(AckRequired, BQSN),
+ maybe_store_ack(
+ AckRequired, MsgId, AckTag,
+ StateN #state { backing_queue_state = BQSN1 })
+ end, State, lists:duplicate(ToDrop, const)),
+ {ok, case AckRequired of
+ true -> State1;
+ false -> update_delta(ToDrop - Dropped, State1)
+ end};
+process_instruction({ack, MsgIds},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_ack = MA }) ->
+ {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA),
+ {MsgIds1, BQS1} = BQ:ack(AckTags, BQS),
+ [] = MsgIds1 -- MsgIds, %% ASSERTION
+ {ok, update_delta(length(MsgIds1) - length(MsgIds),
+ State #state { msg_id_ack = MA1,
+ backing_queue_state = BQS1 })};
+process_instruction({requeue, MsgIds},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_ack = MA }) ->
+ {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA),
+ {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS),
+ {ok, State #state { msg_id_ack = MA1,
+ backing_queue_state = BQS1 }};
+process_instruction({sender_death, ChPid},
+ State = #state { known_senders = KS }) ->
+ %% The channel will be monitored iff we have received a message
+ %% from it. In this case we just want to avoid doing work if we
+ %% never got any messages.
+ {ok, case pmon:is_monitored(ChPid, KS) of
+ false -> State;
+ true -> maybe_forget_sender(ChPid, down_from_gm, State)
+ end};
+process_instruction({depth, Depth},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {ok, set_delta(Depth - BQ:depth(BQS), State)};
+
+process_instruction({delete_and_terminate, Reason},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ BQ:delete_and_terminate(Reason, BQS),
+ {stop, State #state { backing_queue_state = undefined }};
+process_instruction({set_queue_mode, Mode},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ BQS1 = BQ:set_queue_mode(Mode, BQS),
+ {ok, State #state { backing_queue_state = BQS1 }}.
+
+maybe_flow_ack(Sender, flow) -> credit_flow:ack(Sender);
+maybe_flow_ack(_Sender, noflow) -> ok.
+
+msg_ids_to_acktags(MsgIds, MA) ->
+ {AckTags, MA1} =
+ lists:foldl(
+ fun (MsgId, {Acc, MAN}) ->
+ case maps:find(MsgId, MA) of
+ error -> {Acc, MAN};
+ {ok, AckTag} -> {[AckTag | Acc], maps:remove(MsgId, MAN)}
+ end
+ end, {[], MA}, MsgIds),
+ {lists:reverse(AckTags), MA1}.
+
+maybe_store_ack(false, _MsgId, _AckTag, State) ->
+ State;
+maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA }) ->
+ State #state { msg_id_ack = maps:put(MsgId, AckTag, MA) }.
+
+set_delta(0, State = #state { depth_delta = undefined }) ->
+ ok = record_synchronised(State#state.q),
+ State #state { depth_delta = 0 };
+set_delta(NewDelta, State = #state { depth_delta = undefined }) ->
+ true = NewDelta > 0, %% assertion
+ State #state { depth_delta = NewDelta };
+set_delta(NewDelta, State = #state { depth_delta = Delta }) ->
+ update_delta(NewDelta - Delta, State).
+
+update_delta(_DeltaChange, State = #state { depth_delta = undefined }) ->
+ State;
+update_delta( DeltaChange, State = #state { depth_delta = 0 }) ->
+ 0 = DeltaChange, %% assertion: we cannot become unsync'ed
+ State;
+update_delta( DeltaChange, State = #state { depth_delta = Delta }) ->
+ true = DeltaChange =< 0, %% assertion: we cannot become 'less' sync'ed
+ set_delta(Delta + DeltaChange, State #state { depth_delta = undefined }).
+
+update_ram_duration(BQ, BQS) ->
+ {RamDuration, BQS1} = BQ:ram_duration(BQS),
+ DesiredDuration =
+ rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+ BQ:set_ram_duration_target(DesiredDuration, BQS1).
+
+record_synchronised(Q0) when ?is_amqqueue(Q0) ->
+ QName = amqqueue:get_name(Q0),
+ Self = self(),
+ F = fun () ->
+ case mnesia:read({rabbit_queue, QName}) of
+ [] ->
+ ok;
+ [Q1] when ?is_amqqueue(Q1) ->
+ SSPids = amqqueue:get_sync_slave_pids(Q1),
+ SSPids1 = [Self | SSPids],
+ Q2 = amqqueue:set_sync_slave_pids(Q1, SSPids1),
+ rabbit_mirror_queue_misc:store_updated_slaves(Q2),
+ {ok, Q2}
+ end
+ end,
+ case rabbit_misc:execute_mnesia_transaction(F) of
+ ok -> ok;
+ {ok, Q2} -> rabbit_mirror_queue_misc:maybe_drop_master_after_sync(Q2)
+ end.
diff --git a/deps/rabbit/src/rabbit_mirror_queue_sync.erl b/deps/rabbit/src/rabbit_mirror_queue_sync.erl
new file mode 100644
index 0000000000..a82ee05599
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mirror_queue_sync.erl
@@ -0,0 +1,420 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_sync).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([master_prepare/4, master_go/8, slave/7, conserve_resources/3]).
+
+-define(SYNC_PROGRESS_INTERVAL, 1000000).
+
+%% There are three processes around, the master, the syncer and the
+%% slave(s). The syncer is an intermediary, linked to the master in
+%% order to make sure we do not mess with the master's credit flow or
+%% set of monitors.
+%%
+%% Interactions
+%% ------------
+%%
+%% '*' indicates repeating messages. All are standard Erlang messages
+%% except sync_start which is sent over GM to flush out any other
+%% messages that we might have sent that way already. (credit) is the
+%% usual credit_flow bump message every so often.
+%%
+%% Master Syncer Slave(s)
+%% sync_mirrors -> || ||
+%% || -- (spawns) --> || ||
+%% || --------- sync_start (over GM) -------> ||
+%% || || <--- sync_ready ---- ||
+%% || || (or) ||
+%% || || <--- sync_deny ----- ||
+%% || <--- ready ---- || ||
+%% || <--- next* ---- || || }
+%% || ---- msg* ----> || || } loop
+%% || || ---- sync_msgs* ---> || }
+%% || || <--- (credit)* ----- || }
+%% || <--- next ---- || ||
+%% || ---- done ----> || ||
+%% || || -- sync_complete --> ||
+%% || (Dies) ||
+
+-type log_fun() :: fun ((string(), [any()]) -> 'ok').
+-type bq() :: atom().
+-type bqs() :: any().
+-type ack() :: any().
+-type slave_sync_state() :: {[{rabbit_types:msg_id(), ack()}], timer:tref(),
+ bqs()}.
+
+%% ---------------------------------------------------------------------------
+%% Master
+
+-spec master_prepare(reference(), rabbit_amqqueue:name(),
+ log_fun(), [pid()]) -> pid().
+
+master_prepare(Ref, QName, Log, SPids) ->
+ MPid = self(),
+ spawn_link(fun () ->
+ ?store_proc_name(QName),
+ syncer(Ref, Log, MPid, SPids)
+ end).
+
+-spec master_go(pid(), reference(), log_fun(),
+ rabbit_mirror_queue_master:stats_fun(),
+ rabbit_mirror_queue_master:stats_fun(),
+ non_neg_integer(),
+ bq(), bqs()) ->
+ {'already_synced', bqs()} | {'ok', bqs()} |
+ {'cancelled', bqs()} |
+ {'shutdown', any(), bqs()} |
+ {'sync_died', any(), bqs()}.
+
+master_go(Syncer, Ref, Log, HandleInfo, EmitStats, SyncBatchSize, BQ, BQS) ->
+ Args = {Syncer, Ref, Log, HandleInfo, EmitStats, rabbit_misc:get_parent()},
+ receive
+ {'EXIT', Syncer, normal} -> {already_synced, BQS};
+ {'EXIT', Syncer, Reason} -> {sync_died, Reason, BQS};
+ {ready, Syncer} -> EmitStats({syncing, 0}),
+ master_batch_go0(Args, SyncBatchSize,
+ BQ, BQS)
+ end.
+
+master_batch_go0(Args, BatchSize, BQ, BQS) ->
+ FoldFun =
+ fun (Msg, MsgProps, Unacked, Acc) ->
+ Acc1 = append_to_acc(Msg, MsgProps, Unacked, Acc),
+ case maybe_master_batch_send(Acc1, BatchSize) of
+ true -> master_batch_send(Args, Acc1);
+ false -> {cont, Acc1}
+ end
+ end,
+ FoldAcc = {[], 0, {0, BQ:depth(BQS)}, erlang:monotonic_time()},
+ bq_fold(FoldFun, FoldAcc, Args, BQ, BQS).
+
+master_batch_send({Syncer, Ref, Log, HandleInfo, EmitStats, Parent},
+ {Batch, I, {Curr, Len}, Last}) ->
+ T = maybe_emit_stats(Last, I, EmitStats, Log),
+ HandleInfo({syncing, I}),
+ handle_set_maximum_since_use(),
+ SyncMsg = {msgs, Ref, lists:reverse(Batch)},
+ NewAcc = {[], I + length(Batch), {Curr, Len}, T},
+ master_send_receive(SyncMsg, NewAcc, Syncer, Ref, Parent).
+
+%% Either send messages when we reach the last one in the queue or
+%% whenever we have accumulated BatchSize messages.
+maybe_master_batch_send({_, _, {Len, Len}, _}, _BatchSize) ->
+ true;
+maybe_master_batch_send({_, _, {Curr, _Len}, _}, BatchSize)
+ when Curr rem BatchSize =:= 0 ->
+ true;
+maybe_master_batch_send(_Acc, _BatchSize) ->
+ false.
+
+bq_fold(FoldFun, FoldAcc, Args, BQ, BQS) ->
+ case BQ:fold(FoldFun, FoldAcc, BQS) of
+ {{shutdown, Reason}, BQS1} -> {shutdown, Reason, BQS1};
+ {{sync_died, Reason}, BQS1} -> {sync_died, Reason, BQS1};
+ {_, BQS1} -> master_done(Args, BQS1)
+ end.
+
+append_to_acc(Msg, MsgProps, Unacked, {Batch, I, {Curr, Len}, T}) ->
+ {[{Msg, MsgProps, Unacked} | Batch], I, {Curr + 1, Len}, T}.
+
+master_send_receive(SyncMsg, NewAcc, Syncer, Ref, Parent) ->
+ receive
+ {'$gen_call', From,
+ cancel_sync_mirrors} -> stop_syncer(Syncer, {cancel, Ref}),
+ gen_server2:reply(From, ok),
+ {stop, cancelled};
+ {next, Ref} -> Syncer ! SyncMsg,
+ {cont, NewAcc};
+ {'EXIT', Parent, Reason} -> {stop, {shutdown, Reason}};
+ {'EXIT', Syncer, Reason} -> {stop, {sync_died, Reason}}
+ end.
+
+master_done({Syncer, Ref, _Log, _HandleInfo, _EmitStats, Parent}, BQS) ->
+ receive
+ {'$gen_call', From,
+ cancel_sync_mirrors} ->
+ stop_syncer(Syncer, {cancel, Ref}),
+ gen_server2:reply(From, ok),
+ {cancelled, BQS};
+ {cancelled, Ref} ->
+ {cancelled, BQS};
+ {next, Ref} ->
+ stop_syncer(Syncer, {done, Ref}),
+ {ok, BQS};
+ {'EXIT', Parent, Reason} ->
+ {shutdown, Reason, BQS};
+ {'EXIT', Syncer, Reason} ->
+ {sync_died, Reason, BQS}
+ end.
+
+stop_syncer(Syncer, Msg) ->
+ unlink(Syncer),
+ Syncer ! Msg,
+ receive {'EXIT', Syncer, _} -> ok
+ after 0 -> ok
+ end.
+
+maybe_emit_stats(Last, I, EmitStats, Log) ->
+ Interval = erlang:convert_time_unit(
+ erlang:monotonic_time() - Last, native, micro_seconds),
+ case Interval > ?SYNC_PROGRESS_INTERVAL of
+ true -> EmitStats({syncing, I}),
+ Log("~p messages", [I]),
+ erlang:monotonic_time();
+ false -> Last
+ end.
+
+handle_set_maximum_since_use() ->
+ receive
+ {'$gen_cast', {set_maximum_since_use, Age}} ->
+ ok = file_handle_cache:set_maximum_since_use(Age)
+ after 0 ->
+ ok
+ end.
+
+%% Master
+%% ---------------------------------------------------------------------------
+%% Syncer
+
+syncer(Ref, Log, MPid, SPids) ->
+ [erlang:monitor(process, SPid) || SPid <- SPids],
+ %% We wait for a reply from the mirrors so that we know they are in
+ %% a receive block and will thus receive messages we send to them
+ %% *without* those messages ending up in their gen_server2 pqueue.
+ case await_slaves(Ref, SPids) of
+ [] -> Log("all mirrors already synced", []);
+ SPids1 -> MPid ! {ready, self()},
+ Log("mirrors ~p to sync", [[node(SPid) || SPid <- SPids1]]),
+ syncer_check_resources(Ref, MPid, SPids1)
+ end.
+
+await_slaves(Ref, SPids) ->
+ [SPid || SPid <- SPids,
+ rabbit_mnesia:on_running_node(SPid) andalso %% [0]
+ receive
+ {sync_ready, Ref, SPid} -> true;
+ {sync_deny, Ref, SPid} -> false;
+ {'DOWN', _, process, SPid, _} -> false
+ end].
+%% [0] This check is in case there's been a partition which has then
+%% healed in between the master retrieving the mirror pids from Mnesia
+%% and sending 'sync_start' over GM. If so there might be mirrors on the
+%% other side of the partition which we can monitor (since they have
+%% rejoined the distributed system with us) but which did not get the
+%% 'sync_start' and so will not reply. We need to act as though they are
+%% down.
+
+syncer_check_resources(Ref, MPid, SPids) ->
+ rabbit_alarm:register(self(), {?MODULE, conserve_resources, []}),
+ %% Before we ask the master node to send the first batch of messages
+ %% over here, we check if one node is already short on memory. If
+ %% that's the case, we wait for the alarm to be cleared before
+ %% starting the syncer loop.
+ AlarmedNodes = lists:any(
+ fun
+ ({{resource_limit, memory, _}, _}) -> true;
+ ({_, _}) -> false
+ end, rabbit_alarm:get_alarms()),
+ if
+ not AlarmedNodes ->
+ MPid ! {next, Ref},
+ syncer_loop(Ref, MPid, SPids);
+ true ->
+ case wait_for_resources(Ref, SPids) of
+ cancel -> MPid ! {cancelled, Ref};
+ SPids1 -> MPid ! {next, Ref},
+ syncer_loop(Ref, MPid, SPids1)
+ end
+ end.
+
+syncer_loop(Ref, MPid, SPids) ->
+ receive
+ {conserve_resources, memory, true} ->
+ case wait_for_resources(Ref, SPids) of
+ cancel -> MPid ! {cancelled, Ref};
+ SPids1 -> syncer_loop(Ref, MPid, SPids1)
+ end;
+ {conserve_resources, _, _} ->
+ %% Ignore other alerts.
+ syncer_loop(Ref, MPid, SPids);
+ {msgs, Ref, Msgs} ->
+ SPids1 = wait_for_credit(SPids),
+ case SPids1 of
+ [] ->
+ % Die silently because there are no mirrors left.
+ ok;
+ _ ->
+ broadcast(SPids1, {sync_msgs, Ref, Msgs}),
+ MPid ! {next, Ref},
+ syncer_loop(Ref, MPid, SPids1)
+ end;
+ {cancel, Ref} ->
+ %% We don't tell the mirrors we will die - so when we do
+ %% they interpret that as a failure, which is what we
+ %% want.
+ ok;
+ {done, Ref} ->
+ [SPid ! {sync_complete, Ref} || SPid <- SPids]
+ end.
+
+broadcast(SPids, Msg) ->
+ [begin
+ credit_flow:send(SPid),
+ SPid ! Msg
+ end || SPid <- SPids].
+
+conserve_resources(Pid, Source, {_, Conserve, _}) ->
+ Pid ! {conserve_resources, Source, Conserve},
+ ok.
+
+wait_for_credit(SPids) ->
+ case credit_flow:blocked() of
+ true -> receive
+ {bump_credit, Msg} ->
+ credit_flow:handle_bump_msg(Msg),
+ wait_for_credit(SPids);
+ {'DOWN', _, process, SPid, _} ->
+ credit_flow:peer_down(SPid),
+ wait_for_credit(lists:delete(SPid, SPids))
+ end;
+ false -> SPids
+ end.
+
+wait_for_resources(Ref, SPids) ->
+ receive
+ {conserve_resources, memory, false} ->
+ SPids;
+ {conserve_resources, _, _} ->
+ %% Ignore other alerts.
+ wait_for_resources(Ref, SPids);
+ {cancel, Ref} ->
+ %% We don't tell the mirrors we will die - so when we do
+ %% they interpret that as a failure, which is what we
+ %% want.
+ cancel;
+ {'DOWN', _, process, SPid, _} ->
+ credit_flow:peer_down(SPid),
+ SPids1 = wait_for_credit(lists:delete(SPid, SPids)),
+ wait_for_resources(Ref, SPids1)
+ end.
+
+%% Syncer
+%% ---------------------------------------------------------------------------
+%% Slave
+
+-spec slave(non_neg_integer(), reference(), timer:tref(), pid(),
+ bq(), bqs(), fun((bq(), bqs()) -> {timer:tref(), bqs()})) ->
+ 'denied' |
+ {'ok' | 'failed', slave_sync_state()} |
+ {'stop', any(), slave_sync_state()}.
+
+slave(0, Ref, _TRef, Syncer, _BQ, _BQS, _UpdateRamDuration) ->
+ Syncer ! {sync_deny, Ref, self()},
+ denied;
+
+slave(_DD, Ref, TRef, Syncer, BQ, BQS, UpdateRamDuration) ->
+ MRef = erlang:monitor(process, Syncer),
+ Syncer ! {sync_ready, Ref, self()},
+ {_MsgCount, BQS1} = BQ:purge(BQ:purge_acks(BQS)),
+ slave_sync_loop({Ref, MRef, Syncer, BQ, UpdateRamDuration,
+ rabbit_misc:get_parent()}, {[], TRef, BQS1}).
+
+slave_sync_loop(Args = {Ref, MRef, Syncer, BQ, UpdateRamDuration, Parent},
+ State = {MA, TRef, BQS}) ->
+ receive
+ {'DOWN', MRef, process, Syncer, _Reason} ->
+ %% If the master dies half way we are not in the usual
+ %% half-synced state (with messages nearer the tail of the
+ %% queue); instead we have ones nearer the head. If we then
+ %% sync with a newly promoted master, or even just receive
+ %% messages from it, we have a hole in the middle. So the
+ %% only thing to do here is purge.
+ {_MsgCount, BQS1} = BQ:purge(BQ:purge_acks(BQS)),
+ credit_flow:peer_down(Syncer),
+ {failed, {[], TRef, BQS1}};
+ {bump_credit, Msg} ->
+ credit_flow:handle_bump_msg(Msg),
+ slave_sync_loop(Args, State);
+ {sync_complete, Ref} ->
+ erlang:demonitor(MRef, [flush]),
+ credit_flow:peer_down(Syncer),
+ {ok, State};
+ {'$gen_cast', {set_maximum_since_use, Age}} ->
+ ok = file_handle_cache:set_maximum_since_use(Age),
+ slave_sync_loop(Args, State);
+ {'$gen_cast', {set_ram_duration_target, Duration}} ->
+ BQS1 = BQ:set_ram_duration_target(Duration, BQS),
+ slave_sync_loop(Args, {MA, TRef, BQS1});
+ {'$gen_cast', {run_backing_queue, Mod, Fun}} ->
+ BQS1 = BQ:invoke(Mod, Fun, BQS),
+ slave_sync_loop(Args, {MA, TRef, BQS1});
+ update_ram_duration ->
+ {TRef1, BQS1} = UpdateRamDuration(BQ, BQS),
+ slave_sync_loop(Args, {MA, TRef1, BQS1});
+ {sync_msgs, Ref, Batch} ->
+ credit_flow:ack(Syncer),
+ {MA1, BQS1} = process_batch(Batch, MA, BQ, BQS),
+ slave_sync_loop(Args, {MA1, TRef, BQS1});
+ {'EXIT', Parent, Reason} ->
+ {stop, Reason, State};
+ %% If the master throws an exception
+ {'$gen_cast', {gm, {delete_and_terminate, Reason}}} ->
+ BQ:delete_and_terminate(Reason, BQS),
+ {stop, Reason, {[], TRef, undefined}}
+ end.
+
+%% We are partitioning messages by the Unacked element in the tuple.
+%% when unacked = true, then it's a publish_delivered message,
+%% otherwise it's a publish message.
+%%
+%% Note that we can't first partition the batch and then publish each
+%% part, since that would result in re-ordering messages, which we
+%% don't want to do.
+process_batch([], MA, _BQ, BQS) ->
+ {MA, BQS};
+process_batch(Batch, MA, BQ, BQS) ->
+ {_Msg, _MsgProps, Unacked} = hd(Batch),
+ process_batch(Batch, Unacked, [], MA, BQ, BQS).
+
+process_batch([{Msg, Props, true = Unacked} | Rest], true = Unacked,
+ Acc, MA, BQ, BQS) ->
+ %% publish_delivered messages don't need the IsDelivered flag,
+ %% therefore we just add {Msg, Props} to the accumulator.
+ process_batch(Rest, Unacked, [{Msg, props(Props)} | Acc],
+ MA, BQ, BQS);
+process_batch([{Msg, Props, false = Unacked} | Rest], false = Unacked,
+ Acc, MA, BQ, BQS) ->
+ %% publish messages needs the IsDelivered flag which is set to true
+ %% here.
+ process_batch(Rest, Unacked, [{Msg, props(Props), true} | Acc],
+ MA, BQ, BQS);
+process_batch(Batch, Unacked, Acc, MA, BQ, BQS) ->
+ {MA1, BQS1} = publish_batch(Unacked, lists:reverse(Acc), MA, BQ, BQS),
+ process_batch(Batch, MA1, BQ, BQS1).
+
+%% Unacked msgs are published via batch_publish.
+publish_batch(false, Batch, MA, BQ, BQS) ->
+ batch_publish(Batch, MA, BQ, BQS);
+%% Acked msgs are published via batch_publish_delivered.
+publish_batch(true, Batch, MA, BQ, BQS) ->
+ batch_publish_delivered(Batch, MA, BQ, BQS).
+
+
+batch_publish(Batch, MA, BQ, BQS) ->
+ BQS1 = BQ:batch_publish(Batch, none, noflow, BQS),
+ {MA, BQS1}.
+
+batch_publish_delivered(Batch, MA, BQ, BQS) ->
+ {AckTags, BQS1} = BQ:batch_publish_delivered(Batch, none, noflow, BQS),
+ MA1 = BQ:zip_msgs_and_acks(Batch, AckTags, MA, BQS1),
+ {MA1, BQS1}.
+
+props(Props) ->
+ Props#message_properties{needs_confirming = false}.
diff --git a/deps/rabbit/src/rabbit_mnesia.erl b/deps/rabbit/src/rabbit_mnesia.erl
new file mode 100644
index 0000000000..070c6a8205
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mnesia.erl
@@ -0,0 +1,1117 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mnesia).
+
+-export([%% Main interface
+ init/0,
+ join_cluster/2,
+ reset/0,
+ force_reset/0,
+ update_cluster_nodes/1,
+ change_cluster_node_type/1,
+ forget_cluster_node/2,
+ force_load_next_boot/0,
+
+ %% Various queries to get the status of the db
+ status/0,
+ is_clustered/0,
+ on_running_node/1,
+ is_process_alive/1,
+ is_registered_process_alive/1,
+ cluster_nodes/1,
+ node_type/0,
+ dir/0,
+ cluster_status_from_mnesia/0,
+
+ %% Operations on the db and utils, mainly used in `rabbit_upgrade' and `rabbit'
+ init_db_unchecked/2,
+ copy_db/1,
+ check_cluster_consistency/0,
+ ensure_mnesia_dir/0,
+
+ %% Hooks used in `rabbit_node_monitor'
+ on_node_up/1,
+ on_node_down/1,
+
+ %% Helpers for diagnostics commands
+ schema_info/1
+ ]).
+
+%% Used internally in rpc calls
+-export([node_info/0, remove_node_if_mnesia_running/1]).
+
+-ifdef(TEST).
+-compile(export_all).
+-export([init_with_lock/3]).
+-endif.
+
+%%----------------------------------------------------------------------------
+
+-export_type([node_type/0, cluster_status/0]).
+
+-type node_type() :: disc | ram.
+-type cluster_status() :: {[node()], [node()], [node()]}.
+
+%%----------------------------------------------------------------------------
+%% Main interface
+%%----------------------------------------------------------------------------
+
+-spec init() -> 'ok'.
+
+init() ->
+ ensure_mnesia_running(),
+ ensure_mnesia_dir(),
+ case is_virgin_node() of
+ true ->
+ rabbit_log:info("Node database directory at ~ts is empty. "
+ "Assuming we need to join an existing cluster or initialise from scratch...~n",
+ [dir()]),
+ rabbit_peer_discovery:log_configured_backend(),
+ rabbit_peer_discovery:maybe_init(),
+ init_with_lock();
+ false ->
+ NodeType = node_type(),
+ init_db_and_upgrade(cluster_nodes(all), NodeType,
+ NodeType =:= ram, _Retry = true),
+ rabbit_peer_discovery:maybe_init(),
+ rabbit_peer_discovery:maybe_register()
+ end,
+ %% We intuitively expect the global name server to be synced when
+ %% Mnesia is up. In fact that's not guaranteed to be the case -
+ %% let's make it so.
+ ok = rabbit_node_monitor:global_sync(),
+ ok.
+
+init_with_lock() ->
+ {Retries, Timeout} = rabbit_peer_discovery:locking_retry_timeout(),
+ init_with_lock(Retries, Timeout, fun run_peer_discovery/0).
+
+init_with_lock(0, _, RunPeerDiscovery) ->
+ case rabbit_peer_discovery:lock_acquisition_failure_mode() of
+ ignore ->
+ rabbit_log:warning("Could not acquire a peer discovery lock, out of retries", []),
+ RunPeerDiscovery(),
+ rabbit_peer_discovery:maybe_register();
+ fail ->
+ exit(cannot_acquire_startup_lock)
+ end;
+init_with_lock(Retries, Timeout, RunPeerDiscovery) ->
+ LockResult = rabbit_peer_discovery:lock(),
+ rabbit_log:debug("rabbit_peer_discovery:lock returned ~p", [LockResult]),
+ case LockResult of
+ not_supported ->
+ rabbit_log:info("Peer discovery backend does not support locking, falling back to randomized delay"),
+ %% See rabbitmq/rabbitmq-server#1202 for details.
+ rabbit_peer_discovery:maybe_inject_randomized_delay(),
+ RunPeerDiscovery(),
+ rabbit_peer_discovery:maybe_register();
+ {error, _Reason} ->
+ timer:sleep(Timeout),
+ init_with_lock(Retries - 1, Timeout, RunPeerDiscovery);
+ {ok, Data} ->
+ try
+ RunPeerDiscovery(),
+ rabbit_peer_discovery:maybe_register()
+ after
+ rabbit_peer_discovery:unlock(Data)
+ end
+ end.
+
+-spec run_peer_discovery() -> ok | {[node()], node_type()}.
+run_peer_discovery() ->
+ {RetriesLeft, DelayInterval} = rabbit_peer_discovery:discovery_retries(),
+ run_peer_discovery_with_retries(RetriesLeft, DelayInterval).
+
+-spec run_peer_discovery_with_retries(non_neg_integer(), non_neg_integer()) -> ok | {[node()], node_type()}.
+run_peer_discovery_with_retries(0, _DelayInterval) ->
+ ok;
+run_peer_discovery_with_retries(RetriesLeft, DelayInterval) ->
+ FindBadNodeNames = fun
+ (Name, BadNames) when is_atom(Name) -> BadNames;
+ (Name, BadNames) -> [Name | BadNames]
+ end,
+ {DiscoveredNodes0, NodeType} =
+ case rabbit_peer_discovery:discover_cluster_nodes() of
+ {error, Reason} ->
+ RetriesLeft1 = RetriesLeft - 1,
+ rabbit_log:error("Peer discovery returned an error: ~p. Will retry after a delay of ~b ms, ~b retries left...",
+ [Reason, DelayInterval, RetriesLeft1]),
+ timer:sleep(DelayInterval),
+ run_peer_discovery_with_retries(RetriesLeft1, DelayInterval);
+ {ok, {Nodes, Type} = Config}
+ when is_list(Nodes) andalso (Type == disc orelse Type == disk orelse Type == ram) ->
+ case lists:foldr(FindBadNodeNames, [], Nodes) of
+ [] -> Config;
+ BadNames -> e({invalid_cluster_node_names, BadNames})
+ end;
+ {ok, {_, BadType}} when BadType /= disc andalso BadType /= ram ->
+ e({invalid_cluster_node_type, BadType});
+ {ok, _} ->
+ e(invalid_cluster_nodes_conf)
+ end,
+ DiscoveredNodes = lists:usort(DiscoveredNodes0),
+ rabbit_log:info("All discovered existing cluster peers: ~s~n",
+ [rabbit_peer_discovery:format_discovered_nodes(DiscoveredNodes)]),
+ Peers = nodes_excl_me(DiscoveredNodes),
+ case Peers of
+ [] ->
+ rabbit_log:info("Discovered no peer nodes to cluster with. "
+ "Some discovery backends can filter nodes out based on a readiness criteria. "
+ "Enabling debug logging might help troubleshoot."),
+ init_db_and_upgrade([node()], disc, false, _Retry = true);
+ _ ->
+ rabbit_log:info("Peer nodes we can cluster with: ~s~n",
+ [rabbit_peer_discovery:format_discovered_nodes(Peers)]),
+ join_discovered_peers(Peers, NodeType)
+ end.
+
+%% Attempts to join discovered,
+%% reachable and compatible (in terms of Mnesia internal protocol version and such)
+%% cluster peers in order.
+join_discovered_peers(TryNodes, NodeType) ->
+ {RetriesLeft, DelayInterval} = rabbit_peer_discovery:discovery_retries(),
+ join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft, DelayInterval).
+
+join_discovered_peers_with_retries(TryNodes, _NodeType, 0, _DelayInterval) ->
+ rabbit_log:warning(
+ "Could not successfully contact any node of: ~s (as in Erlang distribution). "
+ "Starting as a blank standalone node...~n",
+ [string:join(lists:map(fun atom_to_list/1, TryNodes), ",")]),
+ init_db_and_upgrade([node()], disc, false, _Retry = true);
+join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft, DelayInterval) ->
+ case find_reachable_peer_to_cluster_with(nodes_excl_me(TryNodes)) of
+ {ok, Node} ->
+ rabbit_log:info("Node '~s' selected for auto-clustering~n", [Node]),
+ {ok, {_, DiscNodes, _}} = discover_cluster0(Node),
+ init_db_and_upgrade(DiscNodes, NodeType, true, _Retry = true),
+ rabbit_connection_tracking:boot(),
+ rabbit_node_monitor:notify_joined_cluster();
+ none ->
+ RetriesLeft1 = RetriesLeft - 1,
+ rabbit_log:error("Trying to join discovered peers failed. Will retry after a delay of ~b ms, ~b retries left...",
+ [DelayInterval, RetriesLeft1]),
+ timer:sleep(DelayInterval),
+ join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft1, DelayInterval)
+ end.
+
+%% Make the node join a cluster. The node will be reset automatically
+%% before we actually cluster it. The nodes provided will be used to
+%% find out about the nodes in the cluster.
+%%
+%% This function will fail if:
+%%
+%% * The node is currently the only disc node of its cluster
+%% * We can't connect to any of the nodes provided
+%% * The node is currently already clustered with the cluster of the nodes
+%% provided
+%%
+%% Note that we make no attempt to verify that the nodes provided are
+%% all in the same cluster, we simply pick the first online node and
+%% we cluster to its cluster.
+
+-spec join_cluster(node(), node_type())
+ -> ok | {ok, already_member} | {error, {inconsistent_cluster, string()}}.
+
+join_cluster(DiscoveryNode, NodeType) ->
+ ensure_mnesia_not_running(),
+ ensure_mnesia_dir(),
+ case is_only_clustered_disc_node() of
+ true -> e(clustering_only_disc_node);
+ false -> ok
+ end,
+ {ClusterNodes, _, _} = discover_cluster([DiscoveryNode]),
+ case me_in_nodes(ClusterNodes) of
+ false ->
+ case check_cluster_consistency(DiscoveryNode, false) of
+ {ok, _} ->
+ %% reset the node. this simplifies things and it
+ %% will be needed in this case - we're joining a new
+ %% cluster with new nodes which are not in synch
+ %% with the current node. It also lifts the burden
+ %% of resetting the node from the user.
+ reset_gracefully(),
+
+ %% Join the cluster
+ rabbit_log:info("Clustering with ~p as ~p node~n",
+ [ClusterNodes, NodeType]),
+ ok = init_db_with_mnesia(ClusterNodes, NodeType,
+ true, true, _Retry = true),
+ rabbit_connection_tracking:boot(),
+ rabbit_node_monitor:notify_joined_cluster(),
+ ok;
+ {error, Reason} ->
+ {error, Reason}
+ end;
+ true ->
+ %% DiscoveryNode thinks that we are part of a cluster, but
+ %% do we think so ourselves?
+ case are_we_clustered_with(DiscoveryNode) of
+ true ->
+ rabbit_log:info("Asked to join a cluster but already a member of it: ~p~n", [ClusterNodes]),
+ {ok, already_member};
+ false ->
+ Msg = format_inconsistent_cluster_message(DiscoveryNode, node()),
+ rabbit_log:error(Msg),
+ {error, {inconsistent_cluster, Msg}}
+ end
+ end.
+
+%% return node to its virgin state, where it is not member of any
+%% cluster, has no cluster configuration, no local database, and no
+%% persisted messages
+
+-spec reset() -> 'ok'.
+
+reset() ->
+ ensure_mnesia_not_running(),
+ rabbit_log:info("Resetting Rabbit~n", []),
+ reset_gracefully().
+
+-spec force_reset() -> 'ok'.
+
+force_reset() ->
+ ensure_mnesia_not_running(),
+ rabbit_log:info("Resetting Rabbit forcefully~n", []),
+ wipe().
+
+reset_gracefully() ->
+ AllNodes = cluster_nodes(all),
+ %% Reconnecting so that we will get an up to date nodes. We don't
+ %% need to check for consistency because we are resetting.
+ %% Force=true here so that reset still works when clustered with a
+ %% node which is down.
+ init_db_with_mnesia(AllNodes, node_type(), false, false, _Retry = false),
+ case is_only_clustered_disc_node() of
+ true -> e(resetting_only_disc_node);
+ false -> ok
+ end,
+ leave_cluster(),
+ rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), cannot_delete_schema),
+ wipe().
+
+wipe() ->
+ %% We need to make sure that we don't end up in a distributed
+ %% Erlang system with nodes while not being in an Mnesia cluster
+ %% with them. We don't handle that well.
+ [erlang:disconnect_node(N) || N <- cluster_nodes(all)],
+ %% remove persisted messages and any other garbage we find
+ ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")),
+ ok = rabbit_node_monitor:reset_cluster_status(),
+ ok.
+
+-spec change_cluster_node_type(node_type()) -> 'ok'.
+
+change_cluster_node_type(Type) ->
+ ensure_mnesia_not_running(),
+ ensure_mnesia_dir(),
+ case is_clustered() of
+ false -> e(not_clustered);
+ true -> ok
+ end,
+ {_, _, RunningNodes} = discover_cluster(cluster_nodes(all)),
+ %% We might still be marked as running by a remote node since the
+ %% information of us going down might not have propagated yet.
+ Node = case RunningNodes -- [node()] of
+ [] -> e(no_online_cluster_nodes);
+ [Node0|_] -> Node0
+ end,
+ ok = reset(),
+ ok = join_cluster(Node, Type).
+
+-spec update_cluster_nodes(node()) -> 'ok'.
+
+update_cluster_nodes(DiscoveryNode) ->
+ ensure_mnesia_not_running(),
+ ensure_mnesia_dir(),
+ Status = {AllNodes, _, _} = discover_cluster([DiscoveryNode]),
+ case me_in_nodes(AllNodes) of
+ true ->
+ %% As in `check_consistency/0', we can safely delete the
+ %% schema here, since it'll be replicated from the other
+ %% nodes
+ mnesia:delete_schema([node()]),
+ rabbit_node_monitor:write_cluster_status(Status),
+ rabbit_log:info("Updating cluster nodes from ~p~n",
+ [DiscoveryNode]),
+ init_db_with_mnesia(AllNodes, node_type(), true, true, _Retry = false);
+ false ->
+ e(inconsistent_cluster)
+ end,
+ ok.
+
+%% We proceed like this: try to remove the node locally. If the node
+%% is offline, we remove the node if:
+%% * This node is a disc node
+%% * All other nodes are offline
+%% * This node was, at the best of our knowledge (see comment below)
+%% the last or second to last after the node we're removing to go
+%% down
+
+-spec forget_cluster_node(node(), boolean()) -> 'ok'.
+
+forget_cluster_node(Node, RemoveWhenOffline) ->
+ forget_cluster_node(Node, RemoveWhenOffline, true).
+
+forget_cluster_node(Node, RemoveWhenOffline, EmitNodeDeletedEvent) ->
+ case lists:member(Node, cluster_nodes(all)) of
+ true -> ok;
+ false -> e(not_a_cluster_node)
+ end,
+ case {RemoveWhenOffline, is_running()} of
+ {true, false} -> remove_node_offline_node(Node);
+ {true, true} -> e(online_node_offline_flag);
+ {false, false} -> e(offline_node_no_offline_flag);
+ {false, true} -> rabbit_log:info(
+ "Removing node ~p from cluster~n", [Node]),
+ case remove_node_if_mnesia_running(Node) of
+ ok when EmitNodeDeletedEvent ->
+ rabbit_event:notify(node_deleted, [{node, Node}]),
+ ok;
+ ok -> ok;
+ {error, _} = Err -> throw(Err)
+ end
+ end.
+
+remove_node_offline_node(Node) ->
+ %% Here `mnesia:system_info(running_db_nodes)' will RPC, but that's what we
+ %% want - we need to know the running nodes *now*. If the current node is a
+ %% RAM node it will return bogus results, but we don't care since we only do
+ %% this operation from disc nodes.
+ case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of
+ {[], disc} ->
+ start_mnesia(),
+ try
+ %% What we want to do here is replace the last node to
+ %% go down with the current node. The way we do this
+ %% is by force loading the table, and making sure that
+ %% they are loaded.
+ rabbit_table:force_load(),
+ rabbit_table:wait_for_replicated(_Retry = false),
+ %% We skip the 'node_deleted' event because the
+ %% application is stopped and thus, rabbit_event is not
+ %% enabled.
+ forget_cluster_node(Node, false, false),
+ force_load_next_boot()
+ after
+ stop_mnesia()
+ end;
+ {_, _} ->
+ e(removing_node_from_offline_node)
+ end.
+
+%%----------------------------------------------------------------------------
+%% Queries
+%%----------------------------------------------------------------------------
+
+-spec status() -> [{'nodes', [{node_type(), [node()]}]} |
+ {'running_nodes', [node()]} |
+ {'partitions', [{node(), [node()]}]}].
+
+status() ->
+ IfNonEmpty = fun (_, []) -> [];
+ (Type, Nodes) -> [{Type, Nodes}]
+ end,
+ [{nodes, (IfNonEmpty(disc, cluster_nodes(disc)) ++
+ IfNonEmpty(ram, cluster_nodes(ram)))}] ++
+ case is_running() of
+ true -> RunningNodes = cluster_nodes(running),
+ [{running_nodes, RunningNodes},
+ {cluster_name, rabbit_nodes:cluster_name()},
+ {partitions, mnesia_partitions(RunningNodes)}];
+ false -> []
+ end.
+
+mnesia_partitions(Nodes) ->
+ Replies = rabbit_node_monitor:partitions(Nodes),
+ [Reply || Reply = {_, R} <- Replies, R =/= []].
+
+is_running() -> mnesia:system_info(is_running) =:= yes.
+
+-spec is_clustered() -> boolean().
+
+is_clustered() -> AllNodes = cluster_nodes(all),
+ AllNodes =/= [] andalso AllNodes =/= [node()].
+
+-spec on_running_node(pid()) -> boolean().
+
+on_running_node(Pid) -> lists:member(node(Pid), cluster_nodes(running)).
+
+%% This requires the process be in the same running cluster as us
+%% (i.e. not partitioned or some random node).
+%%
+%% See also rabbit_misc:is_process_alive/1 which does not.
+
+-spec is_process_alive(pid() | {atom(), node()}) -> boolean().
+
+is_process_alive(Pid) when is_pid(Pid) ->
+ on_running_node(Pid) andalso
+ rpc:call(node(Pid), erlang, is_process_alive, [Pid]) =:= true;
+is_process_alive({Name, Node}) ->
+ lists:member(Node, cluster_nodes(running)) andalso
+ rpc:call(Node, rabbit_mnesia, is_registered_process_alive, [Name]) =:= true.
+
+-spec is_registered_process_alive(atom()) -> boolean().
+
+is_registered_process_alive(Name) ->
+ is_pid(whereis(Name)).
+
+-spec cluster_nodes('all' | 'disc' | 'ram' | 'running') -> [node()].
+
+cluster_nodes(WhichNodes) -> cluster_status(WhichNodes).
+
+%% This function is the actual source of information, since it gets
+%% the data from mnesia. Obviously it'll work only when mnesia is
+%% running.
+
+-spec cluster_status_from_mnesia() -> rabbit_types:ok_or_error2(
+ cluster_status(), any()).
+
+cluster_status_from_mnesia() ->
+ case is_running() of
+ false ->
+ {error, mnesia_not_running};
+ true ->
+ %% If the tables are not present, it means that
+ %% `init_db/3' hasn't been run yet. In other words, either
+ %% we are a virgin node or a restarted RAM node. In both
+ %% cases we're not interested in what mnesia has to say.
+ NodeType = case mnesia:system_info(use_dir) of
+ true -> disc;
+ false -> ram
+ end,
+ case rabbit_table:is_present() of
+ true -> AllNodes = mnesia:system_info(db_nodes),
+ DiscCopies = mnesia:table_info(schema, disc_copies),
+ DiscNodes = case NodeType of
+ disc -> nodes_incl_me(DiscCopies);
+ ram -> DiscCopies
+ end,
+ %% `mnesia:system_info(running_db_nodes)' is safe since
+ %% we know that mnesia is running
+ RunningNodes = mnesia:system_info(running_db_nodes),
+ {ok, {AllNodes, DiscNodes, RunningNodes}};
+ false -> {error, tables_not_present}
+ end
+ end.
+
+cluster_status(WhichNodes) ->
+ {AllNodes, DiscNodes, RunningNodes} = Nodes =
+ case cluster_status_from_mnesia() of
+ {ok, Nodes0} ->
+ Nodes0;
+ {error, _Reason} ->
+ {AllNodes0, DiscNodes0, RunningNodes0} =
+ rabbit_node_monitor:read_cluster_status(),
+ %% The cluster status file records the status when the node is
+ %% online, but we know for sure that the node is offline now, so
+ %% we can remove it from the list of running nodes.
+ {AllNodes0, DiscNodes0, nodes_excl_me(RunningNodes0)}
+ end,
+ case WhichNodes of
+ status -> Nodes;
+ all -> AllNodes;
+ disc -> DiscNodes;
+ ram -> AllNodes -- DiscNodes;
+ running -> RunningNodes
+ end.
+
+node_info() ->
+ {rabbit_misc:otp_release(), rabbit_misc:version(),
+ mnesia:system_info(protocol_version),
+ cluster_status_from_mnesia()}.
+
+-spec node_type() -> node_type().
+
+node_type() ->
+ {_AllNodes, DiscNodes, _RunningNodes} =
+ rabbit_node_monitor:read_cluster_status(),
+ case DiscNodes =:= [] orelse me_in_nodes(DiscNodes) of
+ true -> disc;
+ false -> ram
+ end.
+
+-spec dir() -> file:filename().
+
+dir() -> mnesia:system_info(directory).
+
+%%----------------------------------------------------------------------------
+%% Operations on the db
+%%----------------------------------------------------------------------------
+
+%% Adds the provided nodes to the mnesia cluster, creating a new
+%% schema if there is the need to and catching up if there are other
+%% nodes in the cluster already. It also updates the cluster status
+%% file.
+init_db(ClusterNodes, NodeType, CheckOtherNodes) ->
+ NodeIsVirgin = is_virgin_node(),
+ rabbit_log:debug("Does data directory looks like that of a blank (uninitialised) node? ~p", [NodeIsVirgin]),
+ Nodes = change_extra_db_nodes(ClusterNodes, CheckOtherNodes),
+ %% Note that we use `system_info' here and not the cluster status
+ %% since when we start rabbit for the first time the cluster
+ %% status will say we are a disc node but the tables won't be
+ %% present yet.
+ WasDiscNode = mnesia:system_info(use_dir),
+ case {Nodes, WasDiscNode, NodeType} of
+ {[], _, ram} ->
+ %% Standalone ram node, we don't want that
+ throw({error, cannot_create_standalone_ram_node});
+ {[], false, disc} ->
+ %% RAM -> disc, starting from scratch
+ ok = create_schema();
+ {[], true, disc} ->
+ %% First disc node up
+ maybe_force_load(),
+ ok;
+ {[_ | _], _, _} ->
+ %% Subsequent node in cluster, catch up
+ maybe_force_load(),
+ ok = rabbit_table:wait_for_replicated(_Retry = true),
+ ok = rabbit_table:ensure_local_copies(NodeType)
+ end,
+ ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin),
+ ensure_schema_integrity(),
+ rabbit_node_monitor:update_cluster_status(),
+ ok.
+
+-spec init_db_unchecked([node()], node_type()) -> 'ok'.
+
+init_db_unchecked(ClusterNodes, NodeType) ->
+ init_db(ClusterNodes, NodeType, false).
+
+init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes, Retry) ->
+ ok = init_db(ClusterNodes, NodeType, CheckOtherNodes),
+ ok = case rabbit_upgrade:maybe_upgrade_local() of
+ ok -> ok;
+ starting_from_scratch -> rabbit_version:record_desired();
+ version_not_available -> schema_ok_or_move()
+ end,
+ %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget
+ %% about the cluster
+ case NodeType of
+ ram -> start_mnesia(),
+ change_extra_db_nodes(ClusterNodes, false);
+ disc -> ok
+ end,
+ %% ...and all nodes will need to wait for tables
+ rabbit_table:wait_for_replicated(Retry),
+ ok.
+
+init_db_with_mnesia(ClusterNodes, NodeType,
+ CheckOtherNodes, CheckConsistency, Retry) ->
+ start_mnesia(CheckConsistency),
+ try
+ init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes, Retry)
+ after
+ stop_mnesia()
+ end.
+
+-spec ensure_mnesia_dir() -> 'ok'.
+
+ensure_mnesia_dir() ->
+ MnesiaDir = dir() ++ "/",
+ case filelib:ensure_dir(MnesiaDir) of
+ {error, Reason} ->
+ throw({error, {cannot_create_mnesia_dir, MnesiaDir, Reason}});
+ ok ->
+ ok
+ end.
+
+ensure_mnesia_running() ->
+ case mnesia:system_info(is_running) of
+ yes ->
+ ok;
+ starting ->
+ wait_for(mnesia_running),
+ ensure_mnesia_running();
+ Reason when Reason =:= no; Reason =:= stopping ->
+ throw({error, mnesia_not_running})
+ end.
+
+ensure_mnesia_not_running() ->
+ case mnesia:system_info(is_running) of
+ no ->
+ ok;
+ stopping ->
+ wait_for(mnesia_not_running),
+ ensure_mnesia_not_running();
+ Reason when Reason =:= yes; Reason =:= starting ->
+ throw({error, mnesia_unexpectedly_running})
+ end.
+
+ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin) ->
+ Ret = rabbit_feature_flags:sync_feature_flags_with_cluster(
+ Nodes, NodeIsVirgin),
+ case Ret of
+ ok -> ok;
+ {error, Reason} -> throw({error, {incompatible_feature_flags, Reason}})
+ end.
+
+ensure_schema_integrity() ->
+ case rabbit_table:check_schema_integrity(_Retry = true) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ throw({error, {schema_integrity_check_failed, Reason}})
+ end.
+
+-spec copy_db(file:filename()) -> rabbit_types:ok_or_error(any()).
+
+copy_db(Destination) ->
+ ok = ensure_mnesia_not_running(),
+ rabbit_file:recursive_copy(dir(), Destination).
+
+force_load_filename() ->
+ filename:join(dir(), "force_load").
+
+-spec force_load_next_boot() -> 'ok'.
+
+force_load_next_boot() ->
+ rabbit_file:write_file(force_load_filename(), <<"">>).
+
+maybe_force_load() ->
+ case rabbit_file:is_file(force_load_filename()) of
+ true -> rabbit_table:force_load(),
+ rabbit_file:delete(force_load_filename());
+ false -> ok
+ end.
+
+%% This does not guarantee us much, but it avoids some situations that
+%% will definitely end up badly
+
+-spec check_cluster_consistency() -> 'ok'.
+
+check_cluster_consistency() ->
+ %% We want to find 0 or 1 consistent nodes.
+ case lists:foldl(
+ fun (Node, {error, _}) -> check_cluster_consistency(Node, true);
+ (_Node, {ok, Status}) -> {ok, Status}
+ end, {error, not_found}, nodes_excl_me(cluster_nodes(all)))
+ of
+ {ok, Status = {RemoteAllNodes, _, _}} ->
+ case ordsets:is_subset(ordsets:from_list(cluster_nodes(all)),
+ ordsets:from_list(RemoteAllNodes)) of
+ true ->
+ ok;
+ false ->
+ %% We delete the schema here since we think we are
+ %% clustered with nodes that are no longer in the
+ %% cluster and there is no other way to remove
+ %% them from our schema. On the other hand, we are
+ %% sure that there is another online node that we
+ %% can use to sync the tables with. There is a
+ %% race here: if between this check and the
+ %% `init_db' invocation the cluster gets
+ %% disbanded, we're left with a node with no
+ %% mnesia data that will try to connect to offline
+ %% nodes.
+ mnesia:delete_schema([node()])
+ end,
+ rabbit_node_monitor:write_cluster_status(Status);
+ {error, not_found} ->
+ ok;
+ {error, _} = E ->
+ throw(E)
+ end.
+
+check_cluster_consistency(Node, CheckNodesConsistency) ->
+ case remote_node_info(Node) of
+ {badrpc, _Reason} ->
+ {error, not_found};
+ {_OTP, Rabbit, DelegateModuleHash, _Status} when is_binary(DelegateModuleHash) ->
+ %% when a delegate module .beam file hash is present
+ %% in the tuple, we are dealing with an old version
+ rabbit_version:version_error("Rabbit", rabbit_misc:version(), Rabbit);
+ {_OTP, _Rabbit, _Protocol, {error, _}} ->
+ {error, not_found};
+ {OTP, Rabbit, Protocol, {ok, Status}} when CheckNodesConsistency ->
+ case check_consistency(Node, OTP, Rabbit, Protocol, Status) of
+ {error, _} = E -> E;
+ {ok, Res} -> {ok, Res}
+ end;
+ {OTP, Rabbit, Protocol, {ok, Status}} ->
+ case check_consistency(Node, OTP, Rabbit, Protocol) of
+ {error, _} = E -> E;
+ ok -> {ok, Status}
+ end
+ end.
+
+remote_node_info(Node) ->
+ case rpc:call(Node, rabbit_mnesia, node_info, []) of
+ {badrpc, _} = Error -> Error;
+ %% RabbitMQ prior to 3.6.2
+ {OTP, Rabbit, Status} -> {OTP, Rabbit, unsupported, Status};
+ %% RabbitMQ 3.6.2 or later
+ {OTP, Rabbit, Protocol, Status} -> {OTP, Rabbit, Protocol, Status}
+ end.
+
+
+%%--------------------------------------------------------------------
+%% Hooks for `rabbit_node_monitor'
+%%--------------------------------------------------------------------
+
+-spec on_node_up(node()) -> 'ok'.
+
+on_node_up(Node) ->
+ case running_disc_nodes() of
+ [Node] -> rabbit_log:info("cluster contains disc nodes again~n");
+ _ -> ok
+ end.
+
+-spec on_node_down(node()) -> 'ok'.
+
+on_node_down(_Node) ->
+ case running_disc_nodes() of
+ [] -> rabbit_log:info("only running disc node went down~n");
+ _ -> ok
+ end.
+
+running_disc_nodes() ->
+ {_AllNodes, DiscNodes, RunningNodes} = cluster_status(status),
+ ordsets:to_list(ordsets:intersection(ordsets:from_list(DiscNodes),
+ ordsets:from_list(RunningNodes))).
+
+%%--------------------------------------------------------------------
+%% Helpers for diagnostics commands
+%%--------------------------------------------------------------------
+
+schema_info(Items) ->
+ Tables = mnesia:system_info(tables),
+ [info(Table, Items) || Table <- Tables].
+
+info(Table, Items) ->
+ All = [{name, Table} | mnesia:table_info(Table, all)],
+ [{Item, proplists:get_value(Item, All)} || Item <- Items].
+
+%%--------------------------------------------------------------------
+%% Internal helpers
+%%--------------------------------------------------------------------
+
+discover_cluster(Nodes) ->
+ case lists:foldl(fun (_, {ok, Res}) -> {ok, Res};
+ (Node, _) -> discover_cluster0(Node)
+ end, {error, no_nodes_provided}, Nodes) of
+ {ok, Res} -> Res;
+ {error, E} -> throw({error, E});
+ {badrpc, Reason} -> throw({badrpc_multi, Reason, Nodes})
+ end.
+
+discover_cluster0(Node) when Node == node() ->
+ {error, cannot_cluster_node_with_itself};
+discover_cluster0(Node) ->
+ rpc:call(Node, rabbit_mnesia, cluster_status_from_mnesia, []).
+
+schema_ok_or_move() ->
+ case rabbit_table:check_schema_integrity(_Retry = false) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ %% NB: we cannot use rabbit_log here since it may not have been
+ %% started yet
+ rabbit_log:warning("schema integrity check failed: ~p~n"
+ "moving database to backup location "
+ "and recreating schema from scratch~n",
+ [Reason]),
+ ok = move_db(),
+ ok = create_schema()
+ end.
+
+%% We only care about disc nodes since ram nodes are supposed to catch
+%% up only
+create_schema() ->
+ stop_mnesia(),
+ rabbit_log:debug("Will bootstrap a schema database..."),
+ rabbit_misc:ensure_ok(mnesia:create_schema([node()]), cannot_create_schema),
+ rabbit_log:debug("Bootstraped a schema database successfully"),
+ start_mnesia(),
+
+ rabbit_log:debug("Will create schema database tables"),
+ ok = rabbit_table:create(),
+ rabbit_log:debug("Created schema database tables successfully"),
+ rabbit_log:debug("Will check schema database integrity..."),
+ ensure_schema_integrity(),
+ rabbit_log:debug("Schema database schema integrity check passed"),
+ ok = rabbit_version:record_desired().
+
+move_db() ->
+ stop_mnesia(),
+ MnesiaDir = filename:dirname(dir() ++ "/"),
+ {{Year, Month, Day}, {Hour, Minute, Second}} = erlang:universaltime(),
+ BackupDir = rabbit_misc:format(
+ "~s_~w~2..0w~2..0w~2..0w~2..0w~2..0w",
+ [MnesiaDir, Year, Month, Day, Hour, Minute, Second]),
+ case file:rename(MnesiaDir, BackupDir) of
+ ok ->
+ %% NB: we cannot use rabbit_log here since it may not have
+ %% been started yet
+ rabbit_log:warning("moved database from ~s to ~s~n",
+ [MnesiaDir, BackupDir]),
+ ok;
+ {error, Reason} -> throw({error, {cannot_backup_mnesia,
+ MnesiaDir, BackupDir, Reason}})
+ end,
+ ensure_mnesia_dir(),
+ start_mnesia(),
+ ok.
+
+remove_node_if_mnesia_running(Node) ->
+ case is_running() of
+ false ->
+ {error, mnesia_not_running};
+ true ->
+ %% Deleting the the schema copy of the node will result in
+ %% the node being removed from the cluster, with that
+ %% change being propagated to all nodes
+ case mnesia:del_table_copy(schema, Node) of
+ {atomic, ok} ->
+ rabbit_amqqueue:forget_all_durable(Node),
+ rabbit_node_monitor:notify_left_cluster(Node),
+ ok;
+ {aborted, Reason} ->
+ {error, {failed_to_remove_node, Node, Reason}}
+ end
+ end.
+
+leave_cluster() ->
+ case nodes_excl_me(cluster_nodes(all)) of
+ [] -> ok;
+ AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of
+ true -> ok;
+ false -> e(no_running_cluster_nodes)
+ end
+ end.
+
+leave_cluster(Node) ->
+ case rpc:call(Node,
+ rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of
+ ok -> true;
+ {error, mnesia_not_running} -> false;
+ {error, Reason} -> throw({error, Reason});
+ {badrpc, nodedown} -> false
+ end.
+
+wait_for(Condition) ->
+ rabbit_log:info("Waiting for ~p...~n", [Condition]),
+ timer:sleep(1000).
+
+start_mnesia(CheckConsistency) ->
+ case CheckConsistency of
+ true -> check_cluster_consistency();
+ false -> ok
+ end,
+ rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+ ensure_mnesia_running().
+
+start_mnesia() ->
+ start_mnesia(true).
+
+stop_mnesia() ->
+ stopped = mnesia:stop(),
+ ensure_mnesia_not_running().
+
+change_extra_db_nodes(ClusterNodes0, CheckOtherNodes) ->
+ ClusterNodes = nodes_excl_me(ClusterNodes0),
+ case {mnesia:change_config(extra_db_nodes, ClusterNodes), ClusterNodes} of
+ {{ok, []}, [_|_]} when CheckOtherNodes ->
+ throw({error, {failed_to_cluster_with, ClusterNodes,
+ "Mnesia could not connect to any nodes."}});
+ {{ok, Nodes}, _} ->
+ Nodes
+ end.
+
+check_consistency(Node, OTP, Rabbit, ProtocolVersion) ->
+ rabbit_misc:sequence_error(
+ [check_mnesia_or_otp_consistency(Node, ProtocolVersion, OTP),
+ check_rabbit_consistency(Node, Rabbit)]).
+
+check_consistency(Node, OTP, Rabbit, ProtocolVersion, Status) ->
+ rabbit_misc:sequence_error(
+ [check_mnesia_or_otp_consistency(Node, ProtocolVersion, OTP),
+ check_rabbit_consistency(Node, Rabbit),
+ check_nodes_consistency(Node, Status)]).
+
+check_nodes_consistency(Node, RemoteStatus = {RemoteAllNodes, _, _}) ->
+ case me_in_nodes(RemoteAllNodes) of
+ true ->
+ {ok, RemoteStatus};
+ false ->
+ {error, {inconsistent_cluster,
+ format_inconsistent_cluster_message(node(), Node)}}
+ end.
+
+check_mnesia_or_otp_consistency(_Node, unsupported, OTP) ->
+ rabbit_version:check_otp_consistency(OTP);
+check_mnesia_or_otp_consistency(Node, ProtocolVersion, _) ->
+ check_mnesia_consistency(Node, ProtocolVersion).
+
+check_mnesia_consistency(Node, ProtocolVersion) ->
+ % If mnesia is running we will just check protocol version
+ % If it's not running, we don't want it to join cluster until all checks pass
+ % so we start it without `dir` env variable to prevent
+ % joining cluster and/or corrupting data
+ with_running_or_clean_mnesia(fun() ->
+ case negotiate_protocol([Node]) of
+ [Node] -> ok;
+ [] ->
+ LocalVersion = mnesia:system_info(protocol_version),
+ {error, {inconsistent_cluster,
+ rabbit_misc:format("Mnesia protocol negotiation failed."
+ " Local version: ~p."
+ " Remote version ~p",
+ [LocalVersion, ProtocolVersion])}}
+ end
+ end).
+
+negotiate_protocol([Node]) ->
+ mnesia_monitor:negotiate_protocol([Node]).
+
+with_running_or_clean_mnesia(Fun) ->
+ IsMnesiaRunning = case mnesia:system_info(is_running) of
+ yes -> true;
+ no -> false;
+ stopping ->
+ ensure_mnesia_not_running(),
+ false;
+ starting ->
+ ensure_mnesia_running(),
+ true
+ end,
+ case IsMnesiaRunning of
+ true -> Fun();
+ false ->
+ SavedMnesiaDir = dir(),
+ application:unset_env(mnesia, dir),
+ SchemaLoc = application:get_env(mnesia, schema_location, opt_disc),
+ application:set_env(mnesia, schema_location, ram),
+ mnesia:start(),
+ Result = Fun(),
+ application:stop(mnesia),
+ application:set_env(mnesia, dir, SavedMnesiaDir),
+ application:set_env(mnesia, schema_location, SchemaLoc),
+ Result
+ end.
+
+check_rabbit_consistency(RemoteNode, RemoteVersion) ->
+ rabbit_misc:sequence_error(
+ [rabbit_version:check_version_consistency(
+ rabbit_misc:version(), RemoteVersion, "Rabbit",
+ fun rabbit_misc:version_minor_equivalent/2),
+ rabbit_feature_flags:check_node_compatibility(RemoteNode)]).
+
+%% This is fairly tricky. We want to know if the node is in the state
+%% that a `reset' would leave it in. We cannot simply check if the
+%% mnesia tables aren't there because restarted RAM nodes won't have
+%% tables while still being non-virgin. What we do instead is to
+%% check if the mnesia directory is non existent or empty, with the
+%% exception of certain files and directories, which can be there very early
+%% on node boot.
+is_virgin_node() ->
+ case rabbit_file:list_dir(dir()) of
+ {error, enoent} ->
+ true;
+ {ok, []} ->
+ true;
+ {ok, List0} ->
+ IgnoredFiles0 =
+ [rabbit_node_monitor:cluster_status_filename(),
+ rabbit_node_monitor:running_nodes_filename(),
+ rabbit_node_monitor:default_quorum_filename(),
+ rabbit_node_monitor:quorum_filename(),
+ rabbit_feature_flags:enabled_feature_flags_list_file()],
+ IgnoredFiles = [filename:basename(File) || File <- IgnoredFiles0],
+ rabbit_log:debug("Files and directories found in node's data directory: ~s, of them to be ignored: ~s",
+ [string:join(lists:usort(List0), ", "), string:join(lists:usort(IgnoredFiles), ", ")]),
+ List = List0 -- IgnoredFiles,
+ rabbit_log:debug("Files and directories found in node's data directory sans ignored ones: ~s", [string:join(lists:usort(List), ", ")]),
+ List =:= []
+ end.
+
+find_reachable_peer_to_cluster_with([]) ->
+ none;
+find_reachable_peer_to_cluster_with([Node | Nodes]) ->
+ Fail = fun (Fmt, Args) ->
+ rabbit_log:warning(
+ "Could not auto-cluster with node ~s: " ++ Fmt, [Node | Args]),
+ find_reachable_peer_to_cluster_with(Nodes)
+ end,
+ case remote_node_info(Node) of
+ {badrpc, _} = Reason ->
+ Fail("~p~n", [Reason]);
+ %% old delegate hash check
+ {_OTP, RMQ, Hash, _} when is_binary(Hash) ->
+ Fail("version ~s~n", [RMQ]);
+ {_OTP, _RMQ, _Protocol, {error, _} = E} ->
+ Fail("~p~n", [E]);
+ {OTP, RMQ, Protocol, _} ->
+ case check_consistency(Node, OTP, RMQ, Protocol) of
+ {error, _} -> Fail("versions ~p~n",
+ [{OTP, RMQ}]);
+ ok -> {ok, Node}
+ end
+ end.
+
+is_only_clustered_disc_node() ->
+ node_type() =:= disc andalso is_clustered() andalso
+ cluster_nodes(disc) =:= [node()].
+
+are_we_clustered_with(Node) ->
+ lists:member(Node, mnesia_lib:all_nodes()).
+
+me_in_nodes(Nodes) -> lists:member(node(), Nodes).
+
+nodes_incl_me(Nodes) -> lists:usort([node()|Nodes]).
+
+nodes_excl_me(Nodes) -> Nodes -- [node()].
+
+-spec e(any()) -> no_return().
+
+e(Tag) -> throw({error, {Tag, error_description(Tag)}}).
+
+error_description({invalid_cluster_node_names, BadNames}) ->
+ "In the 'cluster_nodes' configuration key, the following node names "
+ "are invalid: " ++ lists:flatten(io_lib:format("~p", [BadNames]));
+error_description({invalid_cluster_node_type, BadType}) ->
+ "In the 'cluster_nodes' configuration key, the node type is invalid "
+ "(expected 'disc' or 'ram'): " ++
+ lists:flatten(io_lib:format("~p", [BadType]));
+error_description(invalid_cluster_nodes_conf) ->
+ "The 'cluster_nodes' configuration key is invalid, it must be of the "
+ "form {[Nodes], Type}, where Nodes is a list of node names and "
+ "Type is either 'disc' or 'ram'";
+error_description(clustering_only_disc_node) ->
+ "You cannot cluster a node if it is the only disc node in its existing "
+ " cluster. If new nodes joined while this node was offline, use "
+ "'update_cluster_nodes' to add them manually.";
+error_description(resetting_only_disc_node) ->
+ "You cannot reset a node when it is the only disc node in a cluster. "
+ "Please convert another node of the cluster to a disc node first.";
+error_description(not_clustered) ->
+ "Non-clustered nodes can only be disc nodes.";
+error_description(no_online_cluster_nodes) ->
+ "Could not find any online cluster nodes. If the cluster has changed, "
+ "you can use the 'update_cluster_nodes' command.";
+error_description(inconsistent_cluster) ->
+ "The nodes provided do not have this node as part of the cluster.";
+error_description(not_a_cluster_node) ->
+ "The node selected is not in the cluster.";
+error_description(online_node_offline_flag) ->
+ "You set the --offline flag, which is used to remove nodes remotely from "
+ "offline nodes, but this node is online.";
+error_description(offline_node_no_offline_flag) ->
+ "You are trying to remove a node from an offline node. That is dangerous, "
+ "but can be done with the --offline flag. Please consult the manual "
+ "for rabbitmqctl for more information.";
+error_description(removing_node_from_offline_node) ->
+ "To remove a node remotely from an offline node, the node you are removing "
+ "from must be a disc node and all the other nodes must be offline.";
+error_description(no_running_cluster_nodes) ->
+ "You cannot leave a cluster if no online nodes are present.".
+
+format_inconsistent_cluster_message(Thinker, Dissident) ->
+ rabbit_misc:format("Node ~p thinks it's clustered "
+ "with node ~p, but ~p disagrees",
+ [Thinker, Dissident, Dissident]).
diff --git a/deps/rabbit/src/rabbit_mnesia_rename.erl b/deps/rabbit/src/rabbit_mnesia_rename.erl
new file mode 100644
index 0000000000..e0d88c0f5e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_mnesia_rename.erl
@@ -0,0 +1,276 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_mnesia_rename).
+-include("rabbit.hrl").
+
+-export([rename/2]).
+-export([maybe_finish/1]).
+
+-define(CONVERT_TABLES, [schema, rabbit_durable_queue]).
+
+%% Supports renaming the nodes in the Mnesia database. In order to do
+%% this, we take a backup of the database, traverse the backup
+%% changing node names and pids as we go, then restore it.
+%%
+%% That's enough for a standalone node, for clusters the story is more
+%% complex. We can take pairs of nodes From and To, but backing up and
+%% restoring the database changes schema cookies, so if we just do
+%% this on all nodes the cluster will refuse to re-form with
+%% "Incompatible schema cookies.". Therefore we do something similar
+%% to what we do for upgrades - the first node in the cluster to
+%% restart becomes the authority, and other nodes wipe their own
+%% Mnesia state and rejoin. They also need to tell Mnesia the old node
+%% is not coming back.
+%%
+%% If we are renaming nodes one at a time then the running cluster
+%% might not be aware that a rename has taken place, so after we wipe
+%% and rejoin we then update any tables (in practice just
+%% rabbit_durable_queue) which should be aware that we have changed.
+
+%%----------------------------------------------------------------------------
+
+-spec rename(node(), [{node(), node()}]) -> 'ok'.
+
+rename(Node, NodeMapList) ->
+ try
+ %% Check everything is correct and figure out what we are
+ %% changing from and to.
+ {FromNode, ToNode, NodeMap} = prepare(Node, NodeMapList),
+
+ %% We backup and restore Mnesia even if other nodes are
+ %% running at the time, and defer the final decision about
+ %% whether to use our mutated copy or rejoin the cluster until
+ %% we restart. That means we might be mutating our copy of the
+ %% database while the cluster is running. *Do not* contact the
+ %% cluster while this is happening, we are likely to get
+ %% confused.
+ application:set_env(kernel, dist_auto_connect, never),
+
+ %% Take a copy we can restore from if we abandon the
+ %% rename. We don't restore from the "backup" since restoring
+ %% that changes schema cookies and might stop us rejoining the
+ %% cluster.
+ ok = rabbit_mnesia:copy_db(mnesia_copy_dir()),
+
+ %% And make the actual changes
+ become(FromNode),
+ take_backup(before_backup_name()),
+ convert_backup(NodeMap, before_backup_name(), after_backup_name()),
+ ok = rabbit_file:write_term_file(rename_config_name(),
+ [{FromNode, ToNode}]),
+ convert_config_files(NodeMap),
+ become(ToNode),
+ restore_backup(after_backup_name()),
+ ok
+ after
+ stop_mnesia()
+ end.
+
+prepare(Node, NodeMapList) ->
+ %% If we have a previous rename and haven't started since, give up.
+ case rabbit_file:is_dir(dir()) of
+ true -> exit({rename_in_progress,
+ "Restart node under old name to roll back"});
+ false -> ok = rabbit_file:ensure_dir(mnesia_copy_dir())
+ end,
+
+ %% Check we don't have two nodes mapped to the same node
+ {FromNodes, ToNodes} = lists:unzip(NodeMapList),
+ case length(FromNodes) - length(lists:usort(ToNodes)) of
+ 0 -> ok;
+ _ -> exit({duplicate_node, ToNodes})
+ end,
+
+ %% Figure out which node we are before and after the change
+ FromNode = case [From || {From, To} <- NodeMapList,
+ To =:= Node] of
+ [N] -> N;
+ [] -> Node
+ end,
+ NodeMap = dict:from_list(NodeMapList),
+ ToNode = case dict:find(FromNode, NodeMap) of
+ {ok, N2} -> N2;
+ error -> FromNode
+ end,
+
+ %% Check that we are in the cluster, all old nodes are in the
+ %% cluster, and no new nodes are.
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ case {FromNodes -- Nodes, ToNodes -- (ToNodes -- Nodes),
+ lists:member(Node, Nodes ++ ToNodes)} of
+ {[], [], true} -> ok;
+ {[], [], false} -> exit({i_am_not_involved, Node});
+ {F, [], _} -> exit({nodes_not_in_cluster, F});
+ {_, T, _} -> exit({nodes_already_in_cluster, T})
+ end,
+ {FromNode, ToNode, NodeMap}.
+
+take_backup(Backup) ->
+ start_mnesia(),
+ %% We backup only local tables: in particular, this excludes the
+ %% connection tracking tables which have no local replica.
+ LocalTables = mnesia:system_info(local_tables),
+ {ok, Name, _Nodes} = mnesia:activate_checkpoint([
+ {max, LocalTables}
+ ]),
+ ok = mnesia:backup_checkpoint(Name, Backup),
+ stop_mnesia().
+
+restore_backup(Backup) ->
+ ok = mnesia:install_fallback(Backup, [{scope, local}]),
+ start_mnesia(),
+ stop_mnesia(),
+ rabbit_mnesia:force_load_next_boot().
+
+-spec maybe_finish([node()]) -> 'ok'.
+
+maybe_finish(AllNodes) ->
+ case rabbit_file:read_term_file(rename_config_name()) of
+ {ok, [{FromNode, ToNode}]} -> finish(FromNode, ToNode, AllNodes);
+ _ -> ok
+ end.
+
+finish(FromNode, ToNode, AllNodes) ->
+ case node() of
+ ToNode ->
+ case rabbit_upgrade:nodes_running(AllNodes) of
+ [] -> finish_primary(FromNode, ToNode);
+ _ -> finish_secondary(FromNode, ToNode, AllNodes)
+ end;
+ FromNode ->
+ rabbit_log:info(
+ "Abandoning rename from ~s to ~s since we are still ~s~n",
+ [FromNode, ToNode, FromNode]),
+ [{ok, _} = file:copy(backup_of_conf(F), F) || F <- config_files()],
+ ok = rabbit_file:recursive_delete([rabbit_mnesia:dir()]),
+ ok = rabbit_file:recursive_copy(
+ mnesia_copy_dir(), rabbit_mnesia:dir()),
+ delete_rename_files();
+ _ ->
+ %% Boot will almost certainly fail but we might as
+ %% well just log this
+ rabbit_log:info(
+ "Rename attempted from ~s to ~s but we are ~s - ignoring.~n",
+ [FromNode, ToNode, node()])
+ end.
+
+finish_primary(FromNode, ToNode) ->
+ rabbit_log:info("Restarting as primary after rename from ~s to ~s~n",
+ [FromNode, ToNode]),
+ delete_rename_files(),
+ ok.
+
+finish_secondary(FromNode, ToNode, AllNodes) ->
+ rabbit_log:info("Restarting as secondary after rename from ~s to ~s~n",
+ [FromNode, ToNode]),
+ rabbit_upgrade:secondary_upgrade(AllNodes),
+ rename_in_running_mnesia(FromNode, ToNode),
+ delete_rename_files(),
+ ok.
+
+dir() -> rabbit_mnesia:dir() ++ "-rename".
+before_backup_name() -> dir() ++ "/backup-before".
+after_backup_name() -> dir() ++ "/backup-after".
+rename_config_name() -> dir() ++ "/pending.config".
+mnesia_copy_dir() -> dir() ++ "/mnesia-copy".
+
+delete_rename_files() -> ok = rabbit_file:recursive_delete([dir()]).
+
+start_mnesia() -> rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+ rabbit_table:force_load(),
+ rabbit_table:wait_for_replicated(_Retry = false).
+stop_mnesia() -> stopped = mnesia:stop().
+
+convert_backup(NodeMap, FromBackup, ToBackup) ->
+ mnesia:traverse_backup(
+ FromBackup, ToBackup,
+ fun
+ (Row, Acc) ->
+ case lists:member(element(1, Row), ?CONVERT_TABLES) of
+ true -> {[update_term(NodeMap, Row)], Acc};
+ false -> {[Row], Acc}
+ end
+ end, switched).
+
+config_files() ->
+ [rabbit_node_monitor:running_nodes_filename(),
+ rabbit_node_monitor:cluster_status_filename()].
+
+backup_of_conf(Path) ->
+ filename:join([dir(), filename:basename(Path)]).
+
+convert_config_files(NodeMap) ->
+ [convert_config_file(NodeMap, Path) || Path <- config_files()].
+
+convert_config_file(NodeMap, Path) ->
+ {ok, Term} = rabbit_file:read_term_file(Path),
+ {ok, _} = file:copy(Path, backup_of_conf(Path)),
+ ok = rabbit_file:write_term_file(Path, update_term(NodeMap, Term)).
+
+lookup_node(OldNode, NodeMap) ->
+ case dict:find(OldNode, NodeMap) of
+ {ok, NewNode} -> NewNode;
+ error -> OldNode
+ end.
+
+mini_map(FromNode, ToNode) -> dict:from_list([{FromNode, ToNode}]).
+
+update_term(NodeMap, L) when is_list(L) ->
+ [update_term(NodeMap, I) || I <- L];
+update_term(NodeMap, T) when is_tuple(T) ->
+ list_to_tuple(update_term(NodeMap, tuple_to_list(T)));
+update_term(NodeMap, Node) when is_atom(Node) ->
+ lookup_node(Node, NodeMap);
+update_term(NodeMap, Pid) when is_pid(Pid) ->
+ rabbit_misc:pid_change_node(Pid, lookup_node(node(Pid), NodeMap));
+update_term(_NodeMap, Term) ->
+ Term.
+
+rename_in_running_mnesia(FromNode, ToNode) ->
+ All = rabbit_mnesia:cluster_nodes(all),
+ Running = rabbit_nodes:all_running(),
+ case {lists:member(FromNode, Running), lists:member(ToNode, All)} of
+ {false, true} -> ok;
+ {true, _} -> exit({old_node_running, FromNode});
+ {_, false} -> exit({new_node_not_in_cluster, ToNode})
+ end,
+ {atomic, ok} = mnesia:del_table_copy(schema, FromNode),
+ Map = mini_map(FromNode, ToNode),
+ {atomic, _} = transform_table(rabbit_durable_queue, Map),
+ ok.
+
+transform_table(Table, Map) ->
+ mnesia:sync_transaction(
+ fun () ->
+ mnesia:lock({table, Table}, write),
+ transform_table(Table, Map, mnesia:first(Table))
+ end).
+
+transform_table(_Table, _Map, '$end_of_table') ->
+ ok;
+transform_table(Table, Map, Key) ->
+ [Term] = mnesia:read(Table, Key, write),
+ ok = mnesia:write(Table, update_term(Map, Term), write),
+ transform_table(Table, Map, mnesia:next(Table, Key)).
+
+become(BecomeNode) ->
+ error_logger:tty(false),
+ case net_adm:ping(BecomeNode) of
+ pong -> exit({node_running, BecomeNode});
+ pang -> ok = net_kernel:stop(),
+ io:format(" * Impersonating node: ~s...", [BecomeNode]),
+ {ok, _} = start_distribution(BecomeNode),
+ io:format(" done~n", []),
+ Dir = mnesia:system_info(directory),
+ io:format(" * Mnesia directory : ~s~n", [Dir])
+ end.
+
+start_distribution(Name) ->
+ rabbit_nodes:ensure_epmd(),
+ NameType = rabbit_nodes_common:name_type(Name),
+ net_kernel:start([Name, NameType]).
diff --git a/deps/rabbit/src/rabbit_msg_file.erl b/deps/rabbit/src/rabbit_msg_file.erl
new file mode 100644
index 0000000000..1a24f690a0
--- /dev/null
+++ b/deps/rabbit/src/rabbit_msg_file.erl
@@ -0,0 +1,114 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_msg_file).
+
+-export([append/3, read/2, scan/4]).
+
+%%----------------------------------------------------------------------------
+
+-include("rabbit_msg_store.hrl").
+
+-define(INTEGER_SIZE_BYTES, 8).
+-define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)).
+-define(WRITE_OK_SIZE_BITS, 8).
+-define(WRITE_OK_MARKER, 255).
+-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
+-define(MSG_ID_SIZE_BYTES, 16).
+-define(MSG_ID_SIZE_BITS, (8 * ?MSG_ID_SIZE_BYTES)).
+-define(SCAN_BLOCK_SIZE, 4194304). %% 4MB
+
+%%----------------------------------------------------------------------------
+
+-type io_device() :: any().
+-type position() :: non_neg_integer().
+-type msg_size() :: non_neg_integer().
+-type file_size() :: non_neg_integer().
+-type message_accumulator(A) ::
+ fun (({rabbit_types:msg_id(), msg_size(), position(), binary()}, A) ->
+ A).
+
+%%----------------------------------------------------------------------------
+
+-spec append(io_device(), rabbit_types:msg_id(), msg()) ->
+ rabbit_types:ok_or_error2(msg_size(), any()).
+
+append(FileHdl, MsgId, MsgBody)
+ when is_binary(MsgId) andalso size(MsgId) =:= ?MSG_ID_SIZE_BYTES ->
+ MsgBodyBin = term_to_binary(MsgBody),
+ MsgBodyBinSize = size(MsgBodyBin),
+ Size = MsgBodyBinSize + ?MSG_ID_SIZE_BYTES,
+ case file_handle_cache:append(FileHdl,
+ <<Size:?INTEGER_SIZE_BITS,
+ MsgId:?MSG_ID_SIZE_BYTES/binary,
+ MsgBodyBin:MsgBodyBinSize/binary,
+ ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
+ ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
+ KO -> KO
+ end.
+
+-spec read(io_device(), msg_size()) ->
+ rabbit_types:ok_or_error2({rabbit_types:msg_id(), msg()},
+ any()).
+
+read(FileHdl, TotalSize) ->
+ Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
+ BodyBinSize = Size - ?MSG_ID_SIZE_BYTES,
+ case file_handle_cache:read(FileHdl, TotalSize) of
+ {ok, <<Size:?INTEGER_SIZE_BITS,
+ MsgId:?MSG_ID_SIZE_BYTES/binary,
+ MsgBodyBin:BodyBinSize/binary,
+ ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
+ {ok, {MsgId, binary_to_term(MsgBodyBin)}};
+ KO -> KO
+ end.
+
+-spec scan(io_device(), file_size(), message_accumulator(A), A) ->
+ {'ok', A, position()}.
+
+scan(FileHdl, FileSize, Fun, Acc) when FileSize >= 0 ->
+ scan(FileHdl, FileSize, <<>>, 0, 0, Fun, Acc).
+
+scan(_FileHdl, FileSize, _Data, FileSize, ScanOffset, _Fun, Acc) ->
+ {ok, Acc, ScanOffset};
+scan(FileHdl, FileSize, Data, ReadOffset, ScanOffset, Fun, Acc) ->
+ Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
+ case file_handle_cache:read(FileHdl, Read) of
+ {ok, Data1} ->
+ {Data2, Acc1, ScanOffset1} =
+ scanner(<<Data/binary, Data1/binary>>, ScanOffset, Fun, Acc),
+ ReadOffset1 = ReadOffset + size(Data1),
+ scan(FileHdl, FileSize, Data2, ReadOffset1, ScanOffset1, Fun, Acc1);
+ _KO ->
+ {ok, Acc, ScanOffset}
+ end.
+
+scanner(<<>>, Offset, _Fun, Acc) ->
+ {<<>>, Acc, Offset};
+scanner(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Offset, _Fun, Acc) ->
+ {<<>>, Acc, Offset}; %% Nothing to do other than stop.
+scanner(<<Size:?INTEGER_SIZE_BITS, MsgIdAndMsg:Size/binary,
+ WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Offset, Fun, Acc) ->
+ TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
+ case WriteMarker of
+ ?WRITE_OK_MARKER ->
+ %% Here we take option 5 from
+ %% https://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in
+ %% which we read the MsgId as a number, and then convert it
+ %% back to a binary in order to work around bugs in
+ %% Erlang's GC.
+ <<MsgIdNum:?MSG_ID_SIZE_BITS, Msg/binary>> =
+ <<MsgIdAndMsg:Size/binary>>,
+ <<MsgId:?MSG_ID_SIZE_BYTES/binary>> =
+ <<MsgIdNum:?MSG_ID_SIZE_BITS>>,
+ scanner(Rest, Offset + TotalSize, Fun,
+ Fun({MsgId, TotalSize, Offset, Msg}, Acc));
+ _ ->
+ scanner(Rest, Offset + TotalSize, Fun, Acc)
+ end;
+scanner(Data, Offset, _Fun, Acc) ->
+ {Data, Acc, Offset}.
diff --git a/deps/rabbit/src/rabbit_msg_record.erl b/deps/rabbit/src/rabbit_msg_record.erl
new file mode 100644
index 0000000000..3ebe14cb9f
--- /dev/null
+++ b/deps/rabbit/src/rabbit_msg_record.erl
@@ -0,0 +1,400 @@
+-module(rabbit_msg_record).
+
+-export([
+ init/1,
+ to_iodata/1,
+ from_amqp091/2,
+ to_amqp091/1,
+ add_message_annotations/2,
+ message_annotation/2,
+ message_annotation/3
+ ]).
+
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+-include_lib("amqp10_common/include/amqp10_framing.hrl").
+
+-type maybe(T) :: T | undefined.
+-type amqp10_data() :: #'v1_0.data'{} |
+ [#'v1_0.amqp_sequence'{} | #'v1_0.data'{}] |
+ #'v1_0.amqp_value'{}.
+-record(msg,
+ {
+ % header :: maybe(#'v1_0.header'{}),
+ % delivery_annotations :: maybe(#'v1_0.delivery_annotations'{}),
+ message_annotations :: maybe(#'v1_0.message_annotations'{}),
+ properties :: maybe(#'v1_0.properties'{}),
+ application_properties :: maybe(#'v1_0.application_properties'{}),
+ data :: maybe(amqp10_data())
+ % footer :: maybe(#'v1_0.footer'{})
+ }).
+
+%% holds static or rarely changing fields
+-record(cfg, {}).
+-record(?MODULE, {cfg :: #cfg{},
+ msg :: #msg{},
+ %% holds a list of modifications to various sections
+ changes = [] :: list()}).
+
+-opaque state() :: #?MODULE{}.
+
+-export_type([
+ state/0
+ ]).
+
+%% this module acts as a wrapper / converter for the internal binar storage format
+%% (AMQP 1.0) and any format it needs to be converted to / from.
+%% Efficiency is key. No unnecessary allocations or work should be done until it
+%% is absolutely needed
+
+%% init from an AMQP 1.0 encoded binary
+-spec init(binary()) -> state().
+init(Bin) when is_binary(Bin) ->
+ %% TODO: delay parsing until needed
+ {MA, P, AP, D} = decode(amqp10_framing:decode_bin(Bin),
+ {undefined, undefined, undefined, undefined}),
+ #?MODULE{cfg = #cfg{},
+ msg = #msg{properties = P,
+ application_properties = AP,
+ message_annotations = MA,
+ data = D}}.
+
+decode([], Acc) ->
+ Acc;
+decode([#'v1_0.message_annotations'{} = MA | Rem], {_, P, AP, D}) ->
+ decode(Rem, {MA, P, AP, D});
+decode([#'v1_0.properties'{} = P | Rem], {MA, _, AP, D}) ->
+ decode(Rem, {MA, P, AP, D});
+decode([#'v1_0.application_properties'{} = AP | Rem], {MA, P, _, D}) ->
+ decode(Rem, {MA, P, AP, D});
+decode([#'v1_0.data'{} = D | Rem], {MA, P, AP, _}) ->
+ decode(Rem, {MA, P, AP, D}).
+
+amqp10_properties_empty(#'v1_0.properties'{message_id = undefined,
+ user_id = undefined,
+ to = undefined,
+ % subject = wrap(utf8, RKey),
+ reply_to = undefined,
+ correlation_id = undefined,
+ content_type = undefined,
+ content_encoding = undefined,
+ creation_time = undefined}) ->
+ true;
+amqp10_properties_empty(_) ->
+ false.
+
+%% to realise the final binary data representation
+-spec to_iodata(state()) -> iodata().
+to_iodata(#?MODULE{msg = #msg{properties = P,
+ application_properties = AP,
+ message_annotations = MA,
+ data = Data}}) ->
+ [
+ case MA of
+ #'v1_0.message_annotations'{content = []} ->
+ <<>>;
+ _ ->
+ amqp10_framing:encode_bin(MA)
+ end,
+ case amqp10_properties_empty(P) of
+ true -> <<>>;
+ false ->
+ amqp10_framing:encode_bin(P)
+ end,
+ case AP of
+ #'v1_0.application_properties'{content = []} ->
+ <<>>;
+ _ ->
+ amqp10_framing:encode_bin(AP)
+ end,
+ amqp10_framing:encode_bin(Data)
+ ].
+
+%% TODO: refine type spec here
+-spec add_message_annotations(#{binary() => {atom(), term()}}, state()) ->
+ state().
+add_message_annotations(Anns,
+ #?MODULE{msg =
+ #msg{message_annotations = MA0} = Msg} = State) ->
+ Content = maps:fold(
+ fun (K, {T, V}, Acc) ->
+ map_add(symbol, K, T, V, Acc)
+ end,
+ case MA0 of
+ undefined -> [];
+ #'v1_0.message_annotations'{content = C} -> C
+ end,
+ Anns),
+
+ State#?MODULE{msg =
+ Msg#msg{message_annotations =
+ #'v1_0.message_annotations'{content = Content}}}.
+
+%% TODO: refine
+-type amqp10_term() :: {atom(), term()}.
+
+-spec message_annotation(binary(), state()) -> undefined | amqp10_term().
+message_annotation(Key, State) ->
+ message_annotation(Key, State, undefined).
+
+-spec message_annotation(binary(), state(), undefined | amqp10_term()) ->
+ undefined | amqp10_term().
+message_annotation(_Key, #?MODULE{msg = #msg{message_annotations = undefined}},
+ Default) ->
+ Default;
+message_annotation(Key,
+ #?MODULE{msg =
+ #msg{message_annotations =
+ #'v1_0.message_annotations'{content = Content}}},
+ Default)
+ when is_binary(Key) ->
+ case lists:search(fun ({{symbol, K}, _}) -> K == Key end, Content) of
+ {value, {_K, V}} ->
+ V;
+ false ->
+ Default
+ end.
+
+
+%% take a binary AMQP 1.0 input function,
+%% parses it and returns the current parse state
+%% this is the input function from storage and from, e.g. socket input
+-spec from_amqp091(#'P_basic'{}, iodata()) -> state().
+from_amqp091(#'P_basic'{message_id = MsgId,
+ expiration = Expiration,
+ delivery_mode = DelMode,
+ headers = Headers,
+ user_id = UserId,
+ reply_to = ReplyTo,
+ type = Type,
+ priority = Priority,
+ app_id = AppId,
+ correlation_id = CorrId,
+ content_type = ContentType,
+ content_encoding = ContentEncoding,
+ timestamp = Timestamp
+ }, Data) ->
+ %% TODO: support parsing properties bin directly?
+ ConvertedTs = case Timestamp of
+ undefined ->
+ undefined;
+ _ ->
+ Timestamp * 1000
+ end,
+ P = #'v1_0.properties'{message_id = wrap(utf8, MsgId),
+ user_id = wrap(binary, UserId),
+ to = undefined,
+ % subject = wrap(utf8, RKey),
+ reply_to = wrap(utf8, ReplyTo),
+ correlation_id = wrap(utf8, CorrId),
+ content_type = wrap(symbol, ContentType),
+ content_encoding = wrap(symbol, ContentEncoding),
+ creation_time = wrap(timestamp, ConvertedTs)},
+
+ APC0 = [{wrap(utf8, K), from_091(T, V)} || {K, T, V}
+ <- case Headers of
+ undefined -> [];
+ _ -> Headers
+ end],
+ %% properties that do not map directly to AMQP 1.0 properties are stored
+ %% in application properties
+ APC = map_add(utf8, <<"x-basic-type">>, utf8, Type,
+ map_add(utf8, <<"x-basic-app-id">>, utf8, AppId, APC0)),
+
+ MAC = map_add(symbol, <<"x-basic-priority">>, ubyte, Priority,
+ map_add(symbol, <<"x-basic-delivery-mode">>, ubyte, DelMode,
+ map_add(symbol, <<"x-basic-expiration">>, utf8, Expiration, []))),
+
+ AP = #'v1_0.application_properties'{content = APC},
+ MA = #'v1_0.message_annotations'{content = MAC},
+ #?MODULE{cfg = #cfg{},
+ msg = #msg{properties = P,
+ application_properties = AP,
+ message_annotations = MA,
+ data = #'v1_0.data'{content = Data}}}.
+
+map_add(_T, _Key, _Type, undefined, Acc) ->
+ Acc;
+map_add(KeyType, Key, Type, Value, Acc) ->
+ [{wrap(KeyType, Key), wrap(Type, Value)} | Acc].
+
+-spec to_amqp091(state()) -> {#'P_basic'{}, iodata()}.
+to_amqp091(#?MODULE{msg = #msg{properties = P,
+ application_properties = APR,
+ message_annotations = MAR,
+ data = #'v1_0.data'{content = Payload}}}) ->
+ #'v1_0.properties'{message_id = MsgId,
+ user_id = UserId,
+ reply_to = ReplyTo0,
+ correlation_id = CorrId,
+ content_type = ContentType,
+ content_encoding = ContentEncoding,
+ creation_time = Timestamp} = case P of
+ undefined ->
+ #'v1_0.properties'{};
+ _ ->
+ P
+ end,
+
+ AP0 = case APR of
+ #'v1_0.application_properties'{content = AC} -> AC;
+ _ -> []
+ end,
+ MA0 = case MAR of
+ #'v1_0.message_annotations'{content = MC} -> MC;
+ _ -> []
+ end,
+
+ {Type, AP1} = amqp10_map_get(utf8(<<"x-basic-type">>), AP0),
+ {AppId, AP} = amqp10_map_get(utf8(<<"x-basic-app-id">>), AP1),
+
+ {Priority, MA1} = amqp10_map_get(symbol(<<"x-basic-priority">>), MA0),
+ {DelMode, MA2} = amqp10_map_get(symbol(<<"x-basic-delivery-mode">>), MA1),
+ {Expiration, _MA} = amqp10_map_get(symbol(<<"x-basic-expiration">>), MA2),
+
+ Headers0 = [to_091(unwrap(K), V) || {K, V} <- AP],
+ {Headers1, MsgId091} = message_id(MsgId, <<"x-message-id-type">>, Headers0),
+ {Headers, CorrId091} = message_id(CorrId, <<"x-correlation-id-type">>, Headers1),
+
+ BP = #'P_basic'{message_id = MsgId091,
+ delivery_mode = DelMode,
+ expiration = Expiration,
+ user_id = unwrap(UserId),
+ headers = case Headers of
+ [] -> undefined;
+ _ -> Headers
+ end,
+ reply_to = unwrap(ReplyTo0),
+ type = Type,
+ app_id = AppId,
+ priority = Priority,
+ correlation_id = CorrId091,
+ content_type = unwrap(ContentType),
+ content_encoding = unwrap(ContentEncoding),
+ timestamp = case unwrap(Timestamp) of
+ undefined ->
+ undefined;
+ Ts ->
+ Ts div 1000
+ end
+ },
+ {BP, Payload}.
+
+%%% Internal
+
+amqp10_map_get(K, AP0) ->
+ case lists:keytake(K, 1, AP0) of
+ false ->
+ {undefined, AP0};
+ {value, {_, V}, AP} ->
+ {unwrap(V), AP}
+ end.
+
+wrap(_Type, undefined) ->
+ undefined;
+wrap(Type, Val) ->
+ {Type, Val}.
+
+unwrap(undefined) ->
+ undefined;
+unwrap({_Type, V}) ->
+ V.
+
+% symbol_for(#'v1_0.properties'{}) ->
+% {symbol, <<"amqp:properties:list">>};
+
+% number_for(#'v1_0.properties'{}) ->
+% {ulong, 115};
+% encode(Frame = #'v1_0.properties'{}) ->
+% amqp10_framing:encode_described(list, 115, Frame);
+
+% encode_described(list, CodeNumber, Frame) ->
+% {described, {ulong, CodeNumber},
+% {list, lists:map(fun encode/1, tl(tuple_to_list(Frame)))}};
+
+% -spec generate(amqp10_type()) -> iolist().
+% generate({described, Descriptor, Value}) ->
+% DescBin = generate(Descriptor),
+% ValueBin = generate(Value),
+% [ ?DESCRIBED_BIN, DescBin, ValueBin ].
+
+to_091(Key, {utf8, V}) when is_binary(V) -> {Key, longstr, V};
+to_091(Key, {long, V}) -> {Key, long, V};
+to_091(Key, {byte, V}) -> {Key, byte, V};
+to_091(Key, {ubyte, V}) -> {Key, unsignedbyte, V};
+to_091(Key, {short, V}) -> {Key, short, V};
+to_091(Key, {ushort, V}) -> {Key, unsignedshort, V};
+to_091(Key, {uint, V}) -> {Key, unsignedint, V};
+to_091(Key, {int, V}) -> {Key, signedint, V};
+to_091(Key, {double, V}) -> {Key, double, V};
+to_091(Key, {float, V}) -> {Key, float, V};
+%% NB: header values can never be shortstr!
+to_091(Key, {timestamp, V}) -> {Key, timestamp, V div 1000};
+to_091(Key, {binary, V}) -> {Key, binary, V};
+to_091(Key, {boolean, V}) -> {Key, bool, V};
+to_091(Key, true) -> {Key, bool, true};
+to_091(Key, false) -> {Key, bool, false}.
+
+from_091(longstr, V) when is_binary(V) -> {utf8, V};
+from_091(long, V) -> {long, V};
+from_091(unsignedbyte, V) -> {ubyte, V};
+from_091(short, V) -> {short, V};
+from_091(unsignedshort, V) -> {ushort, V};
+from_091(unsignedint, V) -> {uint, V};
+from_091(signedint, V) -> {int, V};
+from_091(double, V) -> {double, V};
+from_091(float, V) -> {float, V};
+from_091(bool, V) -> {boolean, V};
+from_091(binary, V) -> {binary, V};
+from_091(timestamp, V) -> {timestamp, V * 1000};
+from_091(byte, V) -> {byte, V}.
+
+% convert_header(signedint, V) -> [$I, <<V:32/signed>>];
+% convert_header(decimal, V) -> {Before, After} = V,
+% [$D, Before, <<After:32>>];
+% convert_header(timestamp, V) -> [$T, <<V:64>>];
+% % convert_header(table, V) -> [$F | table_to_binary(V)];
+% % convert_header(array, V) -> [$A | array_to_binary(V)];
+% convert_header(byte, V) -> [$b, <<V:8/signed>>];
+% convert_header(double, V) -> [$d, <<V:64/float>>];
+% convert_header(float, V) -> [$f, <<V:32/float>>];
+% convert_header(short, V) -> [$s, <<V:16/signed>>];
+% convert_header(binary, V) -> [$x | long_string_to_binary(V)];
+% convert_header(unsignedbyte, V) -> [$B, <<V:8/unsigned>>];
+% convert_header(unsignedshort, V) -> [$u, <<V:16/unsigned>>];
+% convert_header(unsignedint, V) -> [$i, <<V:32/unsigned>>];
+% convert_header(void, _V) -> [$V].
+
+utf8(T) -> {utf8, T}.
+symbol(T) -> {symbol, T}.
+
+message_id({uuid, UUID}, HKey, H0) ->
+ H = [{HKey, longstr, <<"uuid">>} | H0],
+ {H, rabbit_data_coercion:to_binary(rabbit_guid:to_string(UUID))};
+message_id({ulong, N}, HKey, H0) ->
+ H = [{HKey, longstr, <<"ulong">>} | H0],
+ {H, erlang:integer_to_binary(N)};
+message_id({binary, B}, HKey, H0) ->
+ E = base64:encode(B),
+ case byte_size(E) > 256 of
+ true ->
+ K = binary:replace(HKey, <<"-type">>, <<>>),
+ {[{K, longstr, B} | H0], undefined};
+ false ->
+ H = [{HKey, longstr, <<"binary">>} | H0],
+ {H, E}
+ end;
+message_id({utf8, S}, HKey, H0) ->
+ case byte_size(S) > 256 of
+ true ->
+ K = binary:replace(HKey, <<"-type">>, <<>>),
+ {[{K, longstr, S} | H0], undefined};
+ false ->
+ {H0, S}
+ end;
+message_id(MsgId, _, H) ->
+ {H, unwrap(MsgId)}.
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
diff --git a/deps/rabbit/src/rabbit_msg_store.erl b/deps/rabbit/src/rabbit_msg_store.erl
new file mode 100644
index 0000000000..4851e56248
--- /dev/null
+++ b/deps/rabbit/src/rabbit_msg_store.erl
@@ -0,0 +1,2245 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_msg_store).
+
+-behaviour(gen_server2).
+
+-export([start_link/4, start_global_store_link/4, successfully_recovered_state/1,
+ client_init/4, client_terminate/1, client_delete_and_terminate/1,
+ client_ref/1, close_all_indicated/1,
+ write/3, write_flow/3, read/2, contains/2, remove/2]).
+
+-export([set_maximum_since_use/2, combine_files/3,
+ delete_file/2]). %% internal
+
+-export([scan_file_for_valid_messages/1]). %% salvage tool
+
+-export([transform_dir/3, force_recovery/2]). %% upgrade
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3, prioritise_call/4, prioritise_cast/3,
+ prioritise_info/3, format_message_queue/2]).
+
+%%----------------------------------------------------------------------------
+
+-include("rabbit_msg_store.hrl").
+
+-define(SYNC_INTERVAL, 25). %% milliseconds
+-define(CLEAN_FILENAME, "clean.dot").
+-define(FILE_SUMMARY_FILENAME, "file_summary.ets").
+-define(TRANSFORM_TMP, "transform_tmp").
+
+-define(BINARY_MODE, [raw, binary]).
+-define(READ_MODE, [read]).
+-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
+-define(WRITE_MODE, [write]).
+
+-define(FILE_EXTENSION, ".rdq").
+-define(FILE_EXTENSION_TMP, ".rdt").
+
+-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
+
+ %% i.e. two pairs, so GC does not go idle when busy
+-define(MAXIMUM_SIMULTANEOUS_GC_FILES, 4).
+
+%%----------------------------------------------------------------------------
+
+-record(msstate,
+ {
+ %% store directory
+ dir,
+ %% the module for index ops,
+ %% rabbit_msg_store_ets_index by default
+ index_module,
+ %% where are messages?
+ index_state,
+ %% current file name as number
+ current_file,
+ %% current file handle since the last fsync?
+ current_file_handle,
+ %% file handle cache
+ file_handle_cache,
+ %% TRef for our interval timer
+ sync_timer_ref,
+ %% sum of valid data in all files
+ sum_valid_data,
+ %% sum of file sizes
+ sum_file_size,
+ %% things to do once GC completes
+ pending_gc_completion,
+ %% pid of our GC
+ gc_pid,
+ %% tid of the shared file handles table
+ file_handles_ets,
+ %% tid of the file summary table
+ file_summary_ets,
+ %% tid of current file cache table
+ cur_file_cache_ets,
+ %% tid of writes/removes in flight
+ flying_ets,
+ %% set of dying clients
+ dying_clients,
+ %% map of references of all registered clients
+ %% to callbacks
+ clients,
+ %% boolean: did we recover state?
+ successfully_recovered,
+ %% how big are our files allowed to get?
+ file_size_limit,
+ %% client ref to synced messages mapping
+ cref_to_msg_ids,
+ %% See CREDIT_DISC_BOUND in rabbit.hrl
+ credit_disc_bound
+ }).
+
+-record(client_msstate,
+ { server,
+ client_ref,
+ file_handle_cache,
+ index_state,
+ index_module,
+ dir,
+ gc_pid,
+ file_handles_ets,
+ file_summary_ets,
+ cur_file_cache_ets,
+ flying_ets,
+ credit_disc_bound
+ }).
+
+-record(file_summary,
+ {file, valid_total_size, left, right, file_size, locked, readers}).
+
+-record(gc_state,
+ { dir,
+ index_module,
+ index_state,
+ file_summary_ets,
+ file_handles_ets,
+ msg_store
+ }).
+
+-record(dying_client,
+ { client_ref,
+ file,
+ offset
+ }).
+
+%%----------------------------------------------------------------------------
+
+-export_type([gc_state/0, file_num/0]).
+
+-type gc_state() :: #gc_state { dir :: file:filename(),
+ index_module :: atom(),
+ index_state :: any(),
+ file_summary_ets :: ets:tid(),
+ file_handles_ets :: ets:tid(),
+ msg_store :: server()
+ }.
+
+-type server() :: pid() | atom().
+-type client_ref() :: binary().
+-type file_num() :: non_neg_integer().
+-type client_msstate() :: #client_msstate {
+ server :: server(),
+ client_ref :: client_ref(),
+ file_handle_cache :: map(),
+ index_state :: any(),
+ index_module :: atom(),
+ dir :: file:filename(),
+ gc_pid :: pid(),
+ file_handles_ets :: ets:tid(),
+ file_summary_ets :: ets:tid(),
+ cur_file_cache_ets :: ets:tid(),
+ flying_ets :: ets:tid(),
+ credit_disc_bound :: {pos_integer(), pos_integer()}}.
+-type msg_ref_delta_gen(A) ::
+ fun ((A) -> 'finished' |
+ {rabbit_types:msg_id(), non_neg_integer(), A}).
+-type maybe_msg_id_fun() ::
+ 'undefined' | fun ((gb_sets:set(), 'written' | 'ignored') -> any()).
+-type maybe_close_fds_fun() :: 'undefined' | fun (() -> 'ok').
+-type deletion_thunk() :: fun (() -> boolean()).
+
+%%----------------------------------------------------------------------------
+
+%% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION
+%% It is not recommended to set this to < 0.5
+-define(GARBAGE_FRACTION, 0.5).
+
+%% Message store is responsible for storing messages
+%% on disk and loading them back. The store handles both
+%% persistent messages and transient ones (when a node
+%% is under RAM pressure and needs to page messages out
+%% to disk). The store is responsible for locating messages
+%% on disk and maintaining an index.
+%%
+%% There are two message stores per node: one for transient
+%% and one for persistent messages.
+%%
+%% Queue processes interact with the stores via clients.
+%%
+%% The components:
+%%
+%% Index: this is a mapping from MsgId to #msg_location{}.
+%% By default, it's in ETS, but other implementations can
+%% be used.
+%% FileSummary: this maps File to #file_summary{} and is stored
+%% in ETS.
+%%
+%% The basic idea is that messages are appended to the current file up
+%% until that file becomes too big (> file_size_limit). At that point,
+%% the file is closed and a new file is created on the _right_ of the
+%% old file which is used for new messages. Files are named
+%% numerically ascending, thus the file with the lowest name is the
+%% eldest file.
+%%
+%% We need to keep track of which messages are in which files (this is
+%% the index); how much useful data is in each file and which files
+%% are on the left and right of each other. This is the purpose of the
+%% file summary ETS table.
+%%
+%% As messages are removed from files, holes appear in these
+%% files. The field ValidTotalSize contains the total amount of useful
+%% data left in the file. This is needed for garbage collection.
+%%
+%% When we discover that a file is now empty, we delete it. When we
+%% discover that it can be combined with the useful data in either its
+%% left or right neighbour, and overall, across all the files, we have
+%% ((the amount of garbage) / (the sum of all file sizes)) >
+%% ?GARBAGE_FRACTION, we start a garbage collection run concurrently,
+%% which will compact the two files together. This keeps disk
+%% utilisation high and aids performance. We deliberately do this
+%% lazily in order to prevent doing GC on files which are soon to be
+%% emptied (and hence deleted).
+%%
+%% Given the compaction between two files, the left file (i.e. elder
+%% file) is considered the ultimate destination for the good data in
+%% the right file. If necessary, the good data in the left file which
+%% is fragmented throughout the file is written out to a temporary
+%% file, then read back in to form a contiguous chunk of good data at
+%% the start of the left file. Thus the left file is garbage collected
+%% and compacted. Then the good data from the right file is copied
+%% onto the end of the left file. Index and file summary tables are
+%% updated.
+%%
+%% On non-clean startup, we scan the files we discover, dealing with
+%% the possibilities of a crash having occurred during a compaction
+%% (this consists of tidyup - the compaction is deliberately designed
+%% such that data is duplicated on disk rather than risking it being
+%% lost), and rebuild the file summary and index ETS table.
+%%
+%% So, with this design, messages move to the left. Eventually, they
+%% should end up in a contiguous block on the left and are then never
+%% rewritten. But this isn't quite the case. If in a file there is one
+%% message that is being ignored, for some reason, and messages in the
+%% file to the right and in the current block are being read all the
+%% time then it will repeatedly be the case that the good data from
+%% both files can be combined and will be written out to a new
+%% file. Whenever this happens, our shunned message will be rewritten.
+%%
+%% So, provided that we combine messages in the right order,
+%% (i.e. left file, bottom to top, right file, bottom to top),
+%% eventually our shunned message will end up at the bottom of the
+%% left file. The compaction/combining algorithm is smart enough to
+%% read in good data from the left file that is scattered throughout
+%% (i.e. C and D in the below diagram), then truncate the file to just
+%% above B (i.e. truncate to the limit of the good contiguous region
+%% at the start of the file), then write C and D on top and then write
+%% E, F and G from the right file on top. Thus contiguous blocks of
+%% good data at the bottom of files are not rewritten.
+%%
+%% +-------+ +-------+ +-------+
+%% | X | | G | | G |
+%% +-------+ +-------+ +-------+
+%% | D | | X | | F |
+%% +-------+ +-------+ +-------+
+%% | X | | X | | E |
+%% +-------+ +-------+ +-------+
+%% | C | | F | ===> | D |
+%% +-------+ +-------+ +-------+
+%% | X | | X | | C |
+%% +-------+ +-------+ +-------+
+%% | B | | X | | B |
+%% +-------+ +-------+ +-------+
+%% | A | | E | | A |
+%% +-------+ +-------+ +-------+
+%% left right left
+%%
+%% From this reasoning, we do have a bound on the number of times the
+%% message is rewritten. From when it is inserted, there can be no
+%% files inserted between it and the head of the queue, and the worst
+%% case is that every time it is rewritten, it moves one position lower
+%% in the file (for it to stay at the same position requires that
+%% there are no holes beneath it, which means truncate would be used
+%% and so it would not be rewritten at all). Thus this seems to
+%% suggest the limit is the number of messages ahead of it in the
+%% queue, though it's likely that that's pessimistic, given the
+%% requirements for compaction/combination of files.
+%%
+%% The other property that we have is the bound on the lowest
+%% utilisation, which should be 50% - worst case is that all files are
+%% fractionally over half full and can't be combined (equivalent is
+%% alternating full files and files with only one tiny message in
+%% them).
+%%
+%% Messages are reference-counted. When a message with the same msg id
+%% is written several times we only store it once, and only remove it
+%% from the store when it has been removed the same number of times.
+%%
+%% The reference counts do not persist. Therefore the initialisation
+%% function must be provided with a generator that produces ref count
+%% deltas for all recovered messages. This is only used on startup
+%% when the shutdown was non-clean.
+%%
+%% Read messages with a reference count greater than one are entered
+%% into a message cache. The purpose of the cache is not especially
+%% performance, though it can help there too, but prevention of memory
+%% explosion. It ensures that as messages with a high reference count
+%% are read from several processes they are read back as the same
+%% binary object rather than multiples of identical binary
+%% objects.
+%%
+%% Reads can be performed directly by clients without calling to the
+%% server. This is safe because multiple file handles can be used to
+%% read files. However, locking is used by the concurrent GC to make
+%% sure that reads are not attempted from files which are in the
+%% process of being garbage collected.
+%%
+%% When a message is removed, its reference count is decremented. Even
+%% if the reference count becomes 0, its entry is not removed. This is
+%% because in the event of the same message being sent to several
+%% different queues, there is the possibility of one queue writing and
+%% removing the message before other queues write it at all. Thus
+%% accommodating 0-reference counts allows us to avoid unnecessary
+%% writes here. Of course, there are complications: the file to which
+%% the message has already been written could be locked pending
+%% deletion or GC, which means we have to rewrite the message as the
+%% original copy will now be lost.
+%%
+%% The server automatically defers reads, removes and contains calls
+%% that occur which refer to files which are currently being
+%% GC'd. Contains calls are only deferred in order to ensure they do
+%% not overtake removes.
+%%
+%% The current file to which messages are being written has a
+%% write-back cache. This is written to immediately by clients and can
+%% be read from by clients too. This means that there are only ever
+%% writes made to the current file, thus eliminating delays due to
+%% flushing write buffers in order to be able to safely read from the
+%% current file. The one exception to this is that on start up, the
+%% cache is not populated with msgs found in the current file, and
+%% thus in this case only, reads may have to come from the file
+%% itself. The effect of this is that even if the msg_store process is
+%% heavily overloaded, clients can still write and read messages with
+%% very low latency and not block at all.
+%%
+%% Clients of the msg_store are required to register before using the
+%% msg_store. This provides them with the necessary client-side state
+%% to allow them to directly access the various caches and files. When
+%% they terminate, they should deregister. They can do this by calling
+%% either client_terminate/1 or client_delete_and_terminate/1. The
+%% differences are: (a) client_terminate is synchronous. As a result,
+%% if the msg_store is badly overloaded and has lots of in-flight
+%% writes and removes to process, this will take some time to
+%% return. However, once it does return, you can be sure that all the
+%% actions you've issued to the msg_store have been processed. (b) Not
+%% only is client_delete_and_terminate/1 asynchronous, but it also
+%% permits writes and subsequent removes from the current
+%% (terminating) client which are still in flight to be safely
+%% ignored. Thus from the point of view of the msg_store itself, and
+%% all from the same client:
+%%
+%% (T) = termination; (WN) = write of msg N; (RN) = remove of msg N
+%% --> W1, W2, W1, R1, T, W3, R2, W2, R1, R2, R3, W4 -->
+%%
+%% The client obviously sent T after all the other messages (up to
+%% W4), but because the msg_store prioritises messages, the T can be
+%% promoted and thus received early.
+%%
+%% Thus at the point of the msg_store receiving T, we have messages 1
+%% and 2 with a refcount of 1. After T, W3 will be ignored because
+%% it's an unknown message, as will R3, and W4. W2, R1 and R2 won't be
+%% ignored because the messages that they refer to were already known
+%% to the msg_store prior to T. However, it can be a little more
+%% complex: after the first R2, the refcount of msg 2 is 0. At that
+%% point, if a GC occurs or file deletion, msg 2 could vanish, which
+%% would then mean that the subsequent W2 and R2 are then ignored.
+%%
+%% The use case then for client_delete_and_terminate/1 is if the
+%% client wishes to remove everything it's written to the msg_store:
+%% it issues removes for all messages it's written and not removed,
+%% and then calls client_delete_and_terminate/1. At that point, any
+%% in-flight writes (and subsequent removes) can be ignored, but
+%% removes and writes for messages the msg_store already knows about
+%% will continue to be processed normally (which will normally just
+%% involve modifying the reference count, which is fast). Thus we save
+%% disk bandwidth for writes which are going to be immediately removed
+%% again by the the terminating client.
+%%
+%% We use a separate set to keep track of the dying clients in order
+%% to keep that set, which is inspected on every write and remove, as
+%% small as possible. Inspecting the set of all clients would degrade
+%% performance with many healthy clients and few, if any, dying
+%% clients, which is the typical case.
+%%
+%% Client termination messages are stored in a separate ets index to
+%% avoid filling primary message store index and message files with
+%% client termination messages.
+%%
+%% When the msg_store has a backlog (i.e. it has unprocessed messages
+%% in its mailbox / gen_server priority queue), a further optimisation
+%% opportunity arises: we can eliminate pairs of 'write' and 'remove'
+%% from the same client for the same message. A typical occurrence of
+%% these is when an empty durable queue delivers persistent messages
+%% to ack'ing consumers. The queue will asynchronously ask the
+%% msg_store to 'write' such messages, and when they are acknowledged
+%% it will issue a 'remove'. That 'remove' may be issued before the
+%% msg_store has processed the 'write'. There is then no point going
+%% ahead with the processing of that 'write'.
+%%
+%% To detect this situation a 'flying_ets' table is shared between the
+%% clients and the server. The table is keyed on the combination of
+%% client (reference) and msg id, and the value represents an
+%% integration of all the writes and removes currently "in flight" for
+%% that message between the client and server - '+1' means all the
+%% writes/removes add up to a single 'write', '-1' to a 'remove', and
+%% '0' to nothing. (NB: the integration can never add up to more than
+%% one 'write' or 'read' since clients must not write/remove a message
+%% more than once without first removing/writing it).
+%%
+%% Maintaining this table poses two challenges: 1) both the clients
+%% and the server access and update the table, which causes
+%% concurrency issues, 2) we must ensure that entries do not stay in
+%% the table forever, since that would constitute a memory leak. We
+%% address the former by carefully modelling all operations as
+%% sequences of atomic actions that produce valid results in all
+%% possible interleavings. We address the latter by deleting table
+%% entries whenever the server finds a 0-valued entry during the
+%% processing of a write/remove. 0 is essentially equivalent to "no
+%% entry". If, OTOH, the value is non-zero we know there is at least
+%% one other 'write' or 'remove' in flight, so we get an opportunity
+%% later to delete the table entry when processing these.
+%%
+%% There are two further complications. We need to ensure that 1)
+%% eliminated writes still get confirmed, and 2) the write-back cache
+%% doesn't grow unbounded. These are quite straightforward to
+%% address. See the comments in the code.
+%%
+%% For notes on Clean Shutdown and startup, see documentation in
+%% rabbit_variable_queue.
+
+%%----------------------------------------------------------------------------
+%% public API
+%%----------------------------------------------------------------------------
+
+-spec start_link
+ (atom(), file:filename(), [binary()] | 'undefined',
+ {msg_ref_delta_gen(A), A}) -> rabbit_types:ok_pid_or_error().
+
+start_link(Type, Dir, ClientRefs, StartupFunState) when is_atom(Type) ->
+ gen_server2:start_link(?MODULE,
+ [Type, Dir, ClientRefs, StartupFunState],
+ [{timeout, infinity}]).
+
+start_global_store_link(Type, Dir, ClientRefs, StartupFunState) when is_atom(Type) ->
+ gen_server2:start_link({local, Type}, ?MODULE,
+ [Type, Dir, ClientRefs, StartupFunState],
+ [{timeout, infinity}]).
+
+-spec successfully_recovered_state(server()) -> boolean().
+
+successfully_recovered_state(Server) ->
+ gen_server2:call(Server, successfully_recovered_state, infinity).
+
+-spec client_init(server(), client_ref(), maybe_msg_id_fun(),
+ maybe_close_fds_fun()) -> client_msstate().
+
+client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) when is_pid(Server); is_atom(Server) ->
+ {IState, IModule, Dir, GCPid,
+ FileHandlesEts, FileSummaryEts, CurFileCacheEts, FlyingEts} =
+ gen_server2:call(
+ Server, {new_client_state, Ref, self(), MsgOnDiskFun, CloseFDsFun},
+ infinity),
+ CreditDiscBound = rabbit_misc:get_env(rabbit, msg_store_credit_disc_bound,
+ ?CREDIT_DISC_BOUND),
+ #client_msstate { server = Server,
+ client_ref = Ref,
+ file_handle_cache = #{},
+ index_state = IState,
+ index_module = IModule,
+ dir = Dir,
+ gc_pid = GCPid,
+ file_handles_ets = FileHandlesEts,
+ file_summary_ets = FileSummaryEts,
+ cur_file_cache_ets = CurFileCacheEts,
+ flying_ets = FlyingEts,
+ credit_disc_bound = CreditDiscBound }.
+
+-spec client_terminate(client_msstate()) -> 'ok'.
+
+client_terminate(CState = #client_msstate { client_ref = Ref }) ->
+ close_all_handles(CState),
+ ok = server_call(CState, {client_terminate, Ref}).
+
+-spec client_delete_and_terminate(client_msstate()) -> 'ok'.
+
+client_delete_and_terminate(CState = #client_msstate { client_ref = Ref }) ->
+ close_all_handles(CState),
+ ok = server_cast(CState, {client_dying, Ref}),
+ ok = server_cast(CState, {client_delete, Ref}).
+
+-spec client_ref(client_msstate()) -> client_ref().
+
+client_ref(#client_msstate { client_ref = Ref }) -> Ref.
+
+-spec write_flow(rabbit_types:msg_id(), msg(), client_msstate()) -> 'ok'.
+
+write_flow(MsgId, Msg,
+ CState = #client_msstate {
+ server = Server,
+ credit_disc_bound = CreditDiscBound }) ->
+ %% Here we are tracking messages sent by the
+ %% rabbit_amqqueue_process process via the
+ %% rabbit_variable_queue. We are accessing the
+ %% rabbit_amqqueue_process process dictionary.
+ credit_flow:send(Server, CreditDiscBound),
+ client_write(MsgId, Msg, flow, CState).
+
+-spec write(rabbit_types:msg_id(), msg(), client_msstate()) -> 'ok'.
+
+write(MsgId, Msg, CState) -> client_write(MsgId, Msg, noflow, CState).
+
+-spec read(rabbit_types:msg_id(), client_msstate()) ->
+ {rabbit_types:ok(msg()) | 'not_found', client_msstate()}.
+
+read(MsgId,
+ CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
+ file_handle_cache_stats:update(msg_store_read),
+ %% Check the cur file cache
+ case ets:lookup(CurFileCacheEts, MsgId) of
+ [] ->
+ Defer = fun() -> {server_call(CState, {read, MsgId}), CState} end,
+ case index_lookup_positive_ref_count(MsgId, CState) of
+ not_found -> Defer();
+ MsgLocation -> client_read1(MsgLocation, Defer, CState)
+ end;
+ [{MsgId, Msg, _CacheRefCount}] ->
+ {{ok, Msg}, CState}
+ end.
+
+-spec contains(rabbit_types:msg_id(), client_msstate()) -> boolean().
+
+contains(MsgId, CState) -> server_call(CState, {contains, MsgId}).
+
+-spec remove([rabbit_types:msg_id()], client_msstate()) -> 'ok'.
+
+remove([], _CState) -> ok;
+remove(MsgIds, CState = #client_msstate { client_ref = CRef }) ->
+ [client_update_flying(-1, MsgId, CState) || MsgId <- MsgIds],
+ server_cast(CState, {remove, CRef, MsgIds}).
+
+-spec set_maximum_since_use(server(), non_neg_integer()) -> 'ok'.
+
+set_maximum_since_use(Server, Age) when is_pid(Server); is_atom(Server) ->
+ gen_server2:cast(Server, {set_maximum_since_use, Age}).
+
+%%----------------------------------------------------------------------------
+%% Client-side-only helpers
+%%----------------------------------------------------------------------------
+
+server_call(#client_msstate { server = Server }, Msg) ->
+ gen_server2:call(Server, Msg, infinity).
+
+server_cast(#client_msstate { server = Server }, Msg) ->
+ gen_server2:cast(Server, Msg).
+
+client_write(MsgId, Msg, Flow,
+ CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts,
+ client_ref = CRef }) ->
+ file_handle_cache_stats:update(msg_store_write),
+ ok = client_update_flying(+1, MsgId, CState),
+ ok = update_msg_cache(CurFileCacheEts, MsgId, Msg),
+ ok = server_cast(CState, {write, CRef, MsgId, Flow}).
+
+client_read1(#msg_location { msg_id = MsgId, file = File } = MsgLocation, Defer,
+ CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
+ case ets:lookup(FileSummaryEts, File) of
+ [] -> %% File has been GC'd and no longer exists. Go around again.
+ read(MsgId, CState);
+ [#file_summary { locked = Locked, right = Right }] ->
+ client_read2(Locked, Right, MsgLocation, Defer, CState)
+ end.
+
+client_read2(false, undefined, _MsgLocation, Defer, _CState) ->
+ %% Although we've already checked both caches and not found the
+ %% message there, the message is apparently in the
+ %% current_file. We can only arrive here if we are trying to read
+ %% a message which we have not written, which is very odd, so just
+ %% defer.
+ %%
+ %% OR, on startup, the cur_file_cache is not populated with the
+ %% contents of the current file, thus reads from the current file
+ %% will end up here and will need to be deferred.
+ Defer();
+client_read2(true, _Right, _MsgLocation, Defer, _CState) ->
+ %% Of course, in the mean time, the GC could have run and our msg
+ %% is actually in a different file, unlocked. However, deferring is
+ %% the safest and simplest thing to do.
+ Defer();
+client_read2(false, _Right,
+ MsgLocation = #msg_location { msg_id = MsgId, file = File },
+ Defer,
+ CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
+ %% It's entirely possible that everything we're doing from here on
+ %% is for the wrong file, or a non-existent file, as a GC may have
+ %% finished.
+ safe_ets_update_counter(
+ FileSummaryEts, File, {#file_summary.readers, +1},
+ fun (_) -> client_read3(MsgLocation, Defer, CState) end,
+ fun () -> read(MsgId, CState) end).
+
+client_read3(#msg_location { msg_id = MsgId, file = File }, Defer,
+ CState = #client_msstate { file_handles_ets = FileHandlesEts,
+ file_summary_ets = FileSummaryEts,
+ gc_pid = GCPid,
+ client_ref = Ref }) ->
+ Release =
+ fun() -> ok = case ets:update_counter(FileSummaryEts, File,
+ {#file_summary.readers, -1}) of
+ 0 -> case ets:lookup(FileSummaryEts, File) of
+ [#file_summary { locked = true }] ->
+ rabbit_msg_store_gc:no_readers(
+ GCPid, File);
+ _ -> ok
+ end;
+ _ -> ok
+ end
+ end,
+ %% If a GC involving the file hasn't already started, it won't
+ %% start now. Need to check again to see if we've been locked in
+ %% the meantime, between lookup and update_counter (thus GC
+ %% started before our +1. In fact, it could have finished by now
+ %% too).
+ case ets:lookup(FileSummaryEts, File) of
+ [] -> %% GC has deleted our file, just go round again.
+ read(MsgId, CState);
+ [#file_summary { locked = true }] ->
+ %% If we get a badarg here, then the GC has finished and
+ %% deleted our file. Try going around again. Otherwise,
+ %% just defer.
+ %%
+ %% badarg scenario: we lookup, msg_store locks, GC starts,
+ %% GC ends, we +1 readers, msg_store ets:deletes (and
+ %% unlocks the dest)
+ try Release(),
+ Defer()
+ catch error:badarg -> read(MsgId, CState)
+ end;
+ [#file_summary { locked = false }] ->
+ %% Ok, we're definitely safe to continue - a GC involving
+ %% the file cannot start up now, and isn't running, so
+ %% nothing will tell us from now on to close the handle if
+ %% it's already open.
+ %%
+ %% Finally, we need to recheck that the msg is still at
+ %% the same place - it's possible an entire GC ran between
+ %% us doing the lookup and the +1 on the readers. (Same as
+ %% badarg scenario above, but we don't have a missing file
+ %% - we just have the /wrong/ file).
+ case index_lookup(MsgId, CState) of
+ #msg_location { file = File } = MsgLocation ->
+ %% Still the same file.
+ {ok, CState1} = close_all_indicated(CState),
+ %% We are now guaranteed that the mark_handle_open
+ %% call will either insert_new correctly, or will
+ %% fail, but find the value is open, not close.
+ mark_handle_open(FileHandlesEts, File, Ref),
+ %% Could the msg_store now mark the file to be
+ %% closed? No: marks for closing are issued only
+ %% when the msg_store has locked the file.
+ %% This will never be the current file
+ {Msg, CState2} = read_from_disk(MsgLocation, CState1),
+ Release(), %% this MUST NOT fail with badarg
+ {{ok, Msg}, CState2};
+ #msg_location {} = MsgLocation -> %% different file!
+ Release(), %% this MUST NOT fail with badarg
+ client_read1(MsgLocation, Defer, CState);
+ not_found -> %% it seems not to exist. Defer, just to be sure.
+ try Release() %% this can badarg, same as locked case, above
+ catch error:badarg -> ok
+ end,
+ Defer()
+ end
+ end.
+
+client_update_flying(Diff, MsgId, #client_msstate { flying_ets = FlyingEts,
+ client_ref = CRef }) ->
+ Key = {MsgId, CRef},
+ case ets:insert_new(FlyingEts, {Key, Diff}) of
+ true -> ok;
+ false -> try ets:update_counter(FlyingEts, Key, {2, Diff}) of
+ 0 -> ok;
+ Diff -> ok;
+ Err -> throw({bad_flying_ets_update, Diff, Err, Key})
+ catch error:badarg ->
+ %% this is guaranteed to succeed since the
+ %% server only removes and updates flying_ets
+ %% entries; it never inserts them
+ true = ets:insert_new(FlyingEts, {Key, Diff})
+ end,
+ ok
+ end.
+
+clear_client(CRef, State = #msstate { cref_to_msg_ids = CTM,
+ dying_clients = DyingClients }) ->
+ State #msstate { cref_to_msg_ids = maps:remove(CRef, CTM),
+ dying_clients = maps:remove(CRef, DyingClients) }.
+
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
+
+
+init([Type, BaseDir, ClientRefs, StartupFunState]) ->
+ process_flag(trap_exit, true),
+
+ ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+ [self()]),
+
+ Dir = filename:join(BaseDir, atom_to_list(Type)),
+ Name = filename:join(filename:basename(BaseDir), atom_to_list(Type)),
+
+ {ok, IndexModule} = application:get_env(rabbit, msg_store_index_module),
+ rabbit_log:info("Message store ~tp: using ~p to provide index~n", [Name, IndexModule]),
+
+ AttemptFileSummaryRecovery =
+ case ClientRefs of
+ undefined -> ok = rabbit_file:recursive_delete([Dir]),
+ ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+ false;
+ _ -> ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+ recover_crashed_compactions(Dir)
+ end,
+ %% if we found crashed compactions we trust neither the
+ %% file_summary nor the location index. Note the file_summary is
+ %% left empty here if it can't be recovered.
+ {FileSummaryRecovered, FileSummaryEts} =
+ recover_file_summary(AttemptFileSummaryRecovery, Dir),
+ {CleanShutdown, IndexState, ClientRefs1} =
+ recover_index_and_client_refs(IndexModule, FileSummaryRecovered,
+ ClientRefs, Dir, Name),
+ Clients = maps:from_list(
+ [{CRef, {undefined, undefined, undefined}} ||
+ CRef <- ClientRefs1]),
+ %% CleanShutdown => msg location index and file_summary both
+ %% recovered correctly.
+ true = case {FileSummaryRecovered, CleanShutdown} of
+ {true, false} -> ets:delete_all_objects(FileSummaryEts);
+ _ -> true
+ end,
+ %% CleanShutdown <=> msg location index and file_summary both
+ %% recovered correctly.
+
+ FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles,
+ [ordered_set, public]),
+ CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]),
+ FlyingEts = ets:new(rabbit_msg_store_flying, [set, public]),
+
+ {ok, FileSizeLimit} = application:get_env(rabbit, msg_store_file_size_limit),
+
+ {ok, GCPid} = rabbit_msg_store_gc:start_link(
+ #gc_state { dir = Dir,
+ index_module = IndexModule,
+ index_state = IndexState,
+ file_summary_ets = FileSummaryEts,
+ file_handles_ets = FileHandlesEts,
+ msg_store = self()
+ }),
+
+ CreditDiscBound = rabbit_misc:get_env(rabbit, msg_store_credit_disc_bound,
+ ?CREDIT_DISC_BOUND),
+
+ State = #msstate { dir = Dir,
+ index_module = IndexModule,
+ index_state = IndexState,
+ current_file = 0,
+ current_file_handle = undefined,
+ file_handle_cache = #{},
+ sync_timer_ref = undefined,
+ sum_valid_data = 0,
+ sum_file_size = 0,
+ pending_gc_completion = maps:new(),
+ gc_pid = GCPid,
+ file_handles_ets = FileHandlesEts,
+ file_summary_ets = FileSummaryEts,
+ cur_file_cache_ets = CurFileCacheEts,
+ flying_ets = FlyingEts,
+ dying_clients = #{},
+ clients = Clients,
+ successfully_recovered = CleanShutdown,
+ file_size_limit = FileSizeLimit,
+ cref_to_msg_ids = #{},
+ credit_disc_bound = CreditDiscBound
+ },
+ %% If we didn't recover the msg location index then we need to
+ %% rebuild it now.
+ Cleanliness = case CleanShutdown of
+ true -> "clean";
+ false -> "unclean"
+ end,
+ rabbit_log:debug("Rebuilding message location index after ~s shutdown...~n",
+ [Cleanliness]),
+ {Offset, State1 = #msstate { current_file = CurFile }} =
+ build_index(CleanShutdown, StartupFunState, State),
+ rabbit_log:debug("Finished rebuilding index~n", []),
+ %% read is only needed so that we can seek
+ {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile),
+ [read | ?WRITE_MODE]),
+ {ok, Offset} = file_handle_cache:position(CurHdl, Offset),
+ ok = file_handle_cache:truncate(CurHdl),
+
+ {ok, maybe_compact(State1 #msstate { current_file_handle = CurHdl }),
+ hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+prioritise_call(Msg, _From, _Len, _State) ->
+ case Msg of
+ successfully_recovered_state -> 7;
+ {new_client_state, _Ref, _Pid, _MODC, _CloseFDsFun} -> 7;
+ {read, _MsgId} -> 2;
+ _ -> 0
+ end.
+
+prioritise_cast(Msg, _Len, _State) ->
+ case Msg of
+ {combine_files, _Source, _Destination, _Reclaimed} -> 8;
+ {delete_file, _File, _Reclaimed} -> 8;
+ {set_maximum_since_use, _Age} -> 8;
+ {client_dying, _Pid} -> 7;
+ _ -> 0
+ end.
+
+prioritise_info(Msg, _Len, _State) ->
+ case Msg of
+ sync -> 8;
+ _ -> 0
+ end.
+
+handle_call(successfully_recovered_state, _From, State) ->
+ reply(State #msstate.successfully_recovered, State);
+
+handle_call({new_client_state, CRef, CPid, MsgOnDiskFun, CloseFDsFun}, _From,
+ State = #msstate { dir = Dir,
+ index_state = IndexState,
+ index_module = IndexModule,
+ file_handles_ets = FileHandlesEts,
+ file_summary_ets = FileSummaryEts,
+ cur_file_cache_ets = CurFileCacheEts,
+ flying_ets = FlyingEts,
+ clients = Clients,
+ gc_pid = GCPid }) ->
+ Clients1 = maps:put(CRef, {CPid, MsgOnDiskFun, CloseFDsFun}, Clients),
+ erlang:monitor(process, CPid),
+ reply({IndexState, IndexModule, Dir, GCPid, FileHandlesEts, FileSummaryEts,
+ CurFileCacheEts, FlyingEts},
+ State #msstate { clients = Clients1 });
+
+handle_call({client_terminate, CRef}, _From, State) ->
+ reply(ok, clear_client(CRef, State));
+
+handle_call({read, MsgId}, From, State) ->
+ State1 = read_message(MsgId, From, State),
+ noreply(State1);
+
+handle_call({contains, MsgId}, From, State) ->
+ State1 = contains_message(MsgId, From, State),
+ noreply(State1).
+
+handle_cast({client_dying, CRef},
+ State = #msstate { dying_clients = DyingClients,
+ current_file_handle = CurHdl,
+ current_file = CurFile }) ->
+ {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
+ DyingClients1 = maps:put(CRef,
+ #dying_client{client_ref = CRef,
+ file = CurFile,
+ offset = CurOffset},
+ DyingClients),
+ noreply(State #msstate { dying_clients = DyingClients1 });
+
+handle_cast({client_delete, CRef},
+ State = #msstate { clients = Clients }) ->
+ State1 = State #msstate { clients = maps:remove(CRef, Clients) },
+ noreply(clear_client(CRef, State1));
+
+handle_cast({write, CRef, MsgId, Flow},
+ State = #msstate { cur_file_cache_ets = CurFileCacheEts,
+ clients = Clients,
+ credit_disc_bound = CreditDiscBound }) ->
+ case Flow of
+ flow -> {CPid, _, _} = maps:get(CRef, Clients),
+ %% We are going to process a message sent by the
+ %% rabbit_amqqueue_process. Now we are accessing the
+ %% msg_store process dictionary.
+ credit_flow:ack(CPid, CreditDiscBound);
+ noflow -> ok
+ end,
+ true = 0 =< ets:update_counter(CurFileCacheEts, MsgId, {3, -1}),
+ case update_flying(-1, MsgId, CRef, State) of
+ process ->
+ [{MsgId, Msg, _PWC}] = ets:lookup(CurFileCacheEts, MsgId),
+ noreply(write_message(MsgId, Msg, CRef, State));
+ ignore ->
+ %% A 'remove' has already been issued and eliminated the
+ %% 'write'.
+ State1 = blind_confirm(CRef, gb_sets:singleton(MsgId),
+ ignored, State),
+ %% If all writes get eliminated, cur_file_cache_ets could
+ %% grow unbounded. To prevent that we delete the cache
+ %% entry here, but only if the message isn't in the
+ %% current file. That way reads of the message can
+ %% continue to be done client side, from either the cache
+ %% or the non-current files. If the message *is* in the
+ %% current file then the cache entry will be removed by
+ %% the normal logic for that in write_message/4 and
+ %% maybe_roll_to_new_file/2.
+ case index_lookup(MsgId, State1) of
+ [#msg_location { file = File }]
+ when File == State1 #msstate.current_file ->
+ ok;
+ _ ->
+ true = ets:match_delete(CurFileCacheEts, {MsgId, '_', 0})
+ end,
+ noreply(State1)
+ end;
+
+handle_cast({remove, CRef, MsgIds}, State) ->
+ {RemovedMsgIds, State1} =
+ lists:foldl(
+ fun (MsgId, {Removed, State2}) ->
+ case update_flying(+1, MsgId, CRef, State2) of
+ process -> {[MsgId | Removed],
+ remove_message(MsgId, CRef, State2)};
+ ignore -> {Removed, State2}
+ end
+ end, {[], State}, MsgIds),
+ noreply(maybe_compact(client_confirm(CRef, gb_sets:from_list(RemovedMsgIds),
+ ignored, State1)));
+
+handle_cast({combine_files, Source, Destination, Reclaimed},
+ State = #msstate { sum_file_size = SumFileSize,
+ file_handles_ets = FileHandlesEts,
+ file_summary_ets = FileSummaryEts,
+ clients = Clients }) ->
+ ok = cleanup_after_file_deletion(Source, State),
+ %% see comment in cleanup_after_file_deletion, and client_read3
+ true = mark_handle_to_close(Clients, FileHandlesEts, Destination, false),
+ true = ets:update_element(FileSummaryEts, Destination,
+ {#file_summary.locked, false}),
+ State1 = State #msstate { sum_file_size = SumFileSize - Reclaimed },
+ noreply(maybe_compact(run_pending([Source, Destination], State1)));
+
+handle_cast({delete_file, File, Reclaimed},
+ State = #msstate { sum_file_size = SumFileSize }) ->
+ ok = cleanup_after_file_deletion(File, State),
+ State1 = State #msstate { sum_file_size = SumFileSize - Reclaimed },
+ noreply(maybe_compact(run_pending([File], State1)));
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+ ok = file_handle_cache:set_maximum_since_use(Age),
+ noreply(State).
+
+handle_info(sync, State) ->
+ noreply(internal_sync(State));
+
+handle_info(timeout, State) ->
+ noreply(internal_sync(State));
+
+handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
+ %% similar to what happens in
+ %% rabbit_amqqueue_process:handle_ch_down but with a relation of
+ %% msg_store -> rabbit_amqqueue_process instead of
+ %% rabbit_amqqueue_process -> rabbit_channel.
+ credit_flow:peer_down(Pid),
+ noreply(State);
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+ {stop, Reason, State}.
+
+terminate(_Reason, State = #msstate { index_state = IndexState,
+ index_module = IndexModule,
+ current_file_handle = CurHdl,
+ gc_pid = GCPid,
+ file_handles_ets = FileHandlesEts,
+ file_summary_ets = FileSummaryEts,
+ cur_file_cache_ets = CurFileCacheEts,
+ flying_ets = FlyingEts,
+ clients = Clients,
+ dir = Dir }) ->
+ rabbit_log:info("Stopping message store for directory '~s'", [Dir]),
+ %% stop the gc first, otherwise it could be working and we pull
+ %% out the ets tables from under it.
+ ok = rabbit_msg_store_gc:stop(GCPid),
+ State1 = case CurHdl of
+ undefined -> State;
+ _ -> State2 = internal_sync(State),
+ ok = file_handle_cache:close(CurHdl),
+ State2
+ end,
+ State3 = close_all_handles(State1),
+ case store_file_summary(FileSummaryEts, Dir) of
+ ok -> ok;
+ {error, FSErr} ->
+ rabbit_log:error("Unable to store file summary"
+ " for vhost message store for directory ~p~n"
+ "Error: ~p~n",
+ [Dir, FSErr])
+ end,
+ [true = ets:delete(T) || T <- [FileSummaryEts, FileHandlesEts,
+ CurFileCacheEts, FlyingEts]],
+ IndexModule:terminate(IndexState),
+ case store_recovery_terms([{client_refs, maps:keys(Clients)},
+ {index_module, IndexModule}], Dir) of
+ ok ->
+ rabbit_log:info("Message store for directory '~s' is stopped", [Dir]),
+ ok;
+ {error, RTErr} ->
+ rabbit_log:error("Unable to save message store recovery terms"
+ " for directory ~p~nError: ~p~n",
+ [Dir, RTErr])
+ end,
+ State3 #msstate { index_state = undefined,
+ current_file_handle = undefined }.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ).
+
+%%----------------------------------------------------------------------------
+%% general helper functions
+%%----------------------------------------------------------------------------
+
+noreply(State) ->
+ {State1, Timeout} = next_state(State),
+ {noreply, State1, Timeout}.
+
+reply(Reply, State) ->
+ {State1, Timeout} = next_state(State),
+ {reply, Reply, State1, Timeout}.
+
+next_state(State = #msstate { sync_timer_ref = undefined,
+ cref_to_msg_ids = CTM }) ->
+ case maps:size(CTM) of
+ 0 -> {State, hibernate};
+ _ -> {start_sync_timer(State), 0}
+ end;
+next_state(State = #msstate { cref_to_msg_ids = CTM }) ->
+ case maps:size(CTM) of
+ 0 -> {stop_sync_timer(State), hibernate};
+ _ -> {State, 0}
+ end.
+
+start_sync_timer(State) ->
+ rabbit_misc:ensure_timer(State, #msstate.sync_timer_ref,
+ ?SYNC_INTERVAL, sync).
+
+stop_sync_timer(State) ->
+ rabbit_misc:stop_timer(State, #msstate.sync_timer_ref).
+
+internal_sync(State = #msstate { current_file_handle = CurHdl,
+ cref_to_msg_ids = CTM }) ->
+ State1 = stop_sync_timer(State),
+ CGs = maps:fold(fun (CRef, MsgIds, NS) ->
+ case gb_sets:is_empty(MsgIds) of
+ true -> NS;
+ false -> [{CRef, MsgIds} | NS]
+ end
+ end, [], CTM),
+ ok = case CGs of
+ [] -> ok;
+ _ -> file_handle_cache:sync(CurHdl)
+ end,
+ lists:foldl(fun ({CRef, MsgIds}, StateN) ->
+ client_confirm(CRef, MsgIds, written, StateN)
+ end, State1, CGs).
+
+update_flying(Diff, MsgId, CRef, #msstate { flying_ets = FlyingEts }) ->
+ Key = {MsgId, CRef},
+ NDiff = -Diff,
+ case ets:lookup(FlyingEts, Key) of
+ [] -> ignore;
+ [{_, Diff}] -> ignore; %% [1]
+ [{_, NDiff}] -> ets:update_counter(FlyingEts, Key, {2, Diff}),
+ true = ets:delete_object(FlyingEts, {Key, 0}),
+ process;
+ [{_, 0}] -> true = ets:delete_object(FlyingEts, {Key, 0}),
+ ignore;
+ [{_, Err}] -> throw({bad_flying_ets_record, Diff, Err, Key})
+ end.
+%% [1] We can get here, for example, in the following scenario: There
+%% is a write followed by a remove in flight. The counter will be 0,
+%% so on processing the write the server attempts to delete the
+%% entry. If at that point the client injects another write it will
+%% either insert a new entry, containing +1, or increment the existing
+%% entry to +1, thus preventing its removal. Either way therefore when
+%% the server processes the read, the counter will be +1.
+
+write_action({true, not_found}, _MsgId, State) ->
+ {ignore, undefined, State};
+write_action({true, #msg_location { file = File }}, _MsgId, State) ->
+ {ignore, File, State};
+write_action({false, not_found}, _MsgId, State) ->
+ {write, State};
+write_action({Mask, #msg_location { ref_count = 0, file = File,
+ total_size = TotalSize }},
+ MsgId, State = #msstate { file_summary_ets = FileSummaryEts }) ->
+ case {Mask, ets:lookup(FileSummaryEts, File)} of
+ {false, [#file_summary { locked = true }]} ->
+ ok = index_delete(MsgId, State),
+ {write, State};
+ {false_if_increment, [#file_summary { locked = true }]} ->
+ %% The msg for MsgId is older than the client death
+ %% message, but as it is being GC'd currently we'll have
+ %% to write a new copy, which will then be younger, so
+ %% ignore this write.
+ {ignore, File, State};
+ {_Mask, [#file_summary {}]} ->
+ ok = index_update_ref_count(MsgId, 1, State),
+ State1 = adjust_valid_total_size(File, TotalSize, State),
+ {confirm, File, State1}
+ end;
+write_action({_Mask, #msg_location { ref_count = RefCount, file = File }},
+ MsgId, State) ->
+ ok = index_update_ref_count(MsgId, RefCount + 1, State),
+ %% We already know about it, just update counter. Only update
+ %% field otherwise bad interaction with concurrent GC
+ {confirm, File, State}.
+
+write_message(MsgId, Msg, CRef,
+ State = #msstate { cur_file_cache_ets = CurFileCacheEts }) ->
+ case write_action(should_mask_action(CRef, MsgId, State), MsgId, State) of
+ {write, State1} ->
+ write_message(MsgId, Msg,
+ record_pending_confirm(CRef, MsgId, State1));
+ {ignore, CurFile, State1 = #msstate { current_file = CurFile }} ->
+ State1;
+ {ignore, _File, State1} ->
+ true = ets:delete_object(CurFileCacheEts, {MsgId, Msg, 0}),
+ State1;
+ {confirm, CurFile, State1 = #msstate { current_file = CurFile }}->
+ record_pending_confirm(CRef, MsgId, State1);
+ {confirm, _File, State1} ->
+ true = ets:delete_object(CurFileCacheEts, {MsgId, Msg, 0}),
+ update_pending_confirms(
+ fun (MsgOnDiskFun, CTM) ->
+ MsgOnDiskFun(gb_sets:singleton(MsgId), written),
+ CTM
+ end, CRef, State1)
+ end.
+
+remove_message(MsgId, CRef,
+ State = #msstate { file_summary_ets = FileSummaryEts }) ->
+ case should_mask_action(CRef, MsgId, State) of
+ {true, _Location} ->
+ State;
+ {false_if_increment, #msg_location { ref_count = 0 }} ->
+ %% CRef has tried to both write and remove this msg whilst
+ %% it's being GC'd.
+ %%
+ %% ASSERTION: [#file_summary { locked = true }] =
+ %% ets:lookup(FileSummaryEts, File),
+ State;
+ {_Mask, #msg_location { ref_count = RefCount, file = File,
+ total_size = TotalSize }}
+ when RefCount > 0 ->
+ %% only update field, otherwise bad interaction with
+ %% concurrent GC
+ Dec = fun () -> index_update_ref_count(
+ MsgId, RefCount - 1, State) end,
+ case RefCount of
+ %% don't remove from cur_file_cache_ets here because
+ %% there may be further writes in the mailbox for the
+ %% same msg.
+ 1 -> case ets:lookup(FileSummaryEts, File) of
+ [#file_summary { locked = true }] ->
+ add_to_pending_gc_completion(
+ {remove, MsgId, CRef}, File, State);
+ [#file_summary {}] ->
+ ok = Dec(),
+ delete_file_if_empty(
+ File, adjust_valid_total_size(
+ File, -TotalSize, State))
+ end;
+ _ -> ok = Dec(),
+ State
+ end
+ end.
+
+write_message(MsgId, Msg,
+ State = #msstate { current_file_handle = CurHdl,
+ current_file = CurFile,
+ sum_valid_data = SumValid,
+ sum_file_size = SumFileSize,
+ file_summary_ets = FileSummaryEts }) ->
+ {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
+ {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
+ ok = index_insert(
+ #msg_location { msg_id = MsgId, ref_count = 1, file = CurFile,
+ offset = CurOffset, total_size = TotalSize }, State),
+ [#file_summary { right = undefined, locked = false }] =
+ ets:lookup(FileSummaryEts, CurFile),
+ [_,_] = ets:update_counter(FileSummaryEts, CurFile,
+ [{#file_summary.valid_total_size, TotalSize},
+ {#file_summary.file_size, TotalSize}]),
+ maybe_roll_to_new_file(CurOffset + TotalSize,
+ State #msstate {
+ sum_valid_data = SumValid + TotalSize,
+ sum_file_size = SumFileSize + TotalSize }).
+
+read_message(MsgId, From, State) ->
+ case index_lookup_positive_ref_count(MsgId, State) of
+ not_found -> gen_server2:reply(From, not_found),
+ State;
+ MsgLocation -> read_message1(From, MsgLocation, State)
+ end.
+
+read_message1(From, #msg_location { msg_id = MsgId, file = File,
+ offset = Offset } = MsgLoc,
+ State = #msstate { current_file = CurFile,
+ current_file_handle = CurHdl,
+ file_summary_ets = FileSummaryEts,
+ cur_file_cache_ets = CurFileCacheEts }) ->
+ case File =:= CurFile of
+ true -> {Msg, State1} =
+ %% can return [] if msg in file existed on startup
+ case ets:lookup(CurFileCacheEts, MsgId) of
+ [] ->
+ {ok, RawOffSet} =
+ file_handle_cache:current_raw_offset(CurHdl),
+ ok = case Offset >= RawOffSet of
+ true -> file_handle_cache:flush(CurHdl);
+ false -> ok
+ end,
+ read_from_disk(MsgLoc, State);
+ [{MsgId, Msg1, _CacheRefCount}] ->
+ {Msg1, State}
+ end,
+ gen_server2:reply(From, {ok, Msg}),
+ State1;
+ false -> [#file_summary { locked = Locked }] =
+ ets:lookup(FileSummaryEts, File),
+ case Locked of
+ true -> add_to_pending_gc_completion({read, MsgId, From},
+ File, State);
+ false -> {Msg, State1} = read_from_disk(MsgLoc, State),
+ gen_server2:reply(From, {ok, Msg}),
+ State1
+ end
+ end.
+
+read_from_disk(#msg_location { msg_id = MsgId, file = File, offset = Offset,
+ total_size = TotalSize }, State) ->
+ {Hdl, State1} = get_read_handle(File, State),
+ {ok, Offset} = file_handle_cache:position(Hdl, Offset),
+ {ok, {MsgId, Msg}} =
+ case rabbit_msg_file:read(Hdl, TotalSize) of
+ {ok, {MsgId, _}} = Obj ->
+ Obj;
+ Rest ->
+ {error, {misread, [{old_state, State},
+ {file_num, File},
+ {offset, Offset},
+ {msg_id, MsgId},
+ {read, Rest},
+ {proc_dict, get()}
+ ]}}
+ end,
+ {Msg, State1}.
+
+contains_message(MsgId, From,
+ State = #msstate { pending_gc_completion = Pending }) ->
+ case index_lookup_positive_ref_count(MsgId, State) of
+ not_found ->
+ gen_server2:reply(From, false),
+ State;
+ #msg_location { file = File } ->
+ case maps:is_key(File, Pending) of
+ true -> add_to_pending_gc_completion(
+ {contains, MsgId, From}, File, State);
+ false -> gen_server2:reply(From, true),
+ State
+ end
+ end.
+
+add_to_pending_gc_completion(
+ Op, File, State = #msstate { pending_gc_completion = Pending }) ->
+ State #msstate { pending_gc_completion =
+ rabbit_misc:maps_cons(File, Op, Pending) }.
+
+run_pending(Files, State) ->
+ lists:foldl(
+ fun (File, State1 = #msstate { pending_gc_completion = Pending }) ->
+ Pending1 = maps:remove(File, Pending),
+ lists:foldl(
+ fun run_pending_action/2,
+ State1 #msstate { pending_gc_completion = Pending1 },
+ lists:reverse(maps:get(File, Pending)))
+ end, State, Files).
+
+run_pending_action({read, MsgId, From}, State) ->
+ read_message(MsgId, From, State);
+run_pending_action({contains, MsgId, From}, State) ->
+ contains_message(MsgId, From, State);
+run_pending_action({remove, MsgId, CRef}, State) ->
+ remove_message(MsgId, CRef, State).
+
+safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
+ try
+ SuccessFun(ets:update_counter(Tab, Key, UpdateOp))
+ catch error:badarg -> FailThunk()
+ end.
+
+update_msg_cache(CacheEts, MsgId, Msg) ->
+ case ets:insert_new(CacheEts, {MsgId, Msg, 1}) of
+ true -> ok;
+ false -> safe_ets_update_counter(
+ CacheEts, MsgId, {3, +1}, fun (_) -> ok end,
+ fun () -> update_msg_cache(CacheEts, MsgId, Msg) end)
+ end.
+
+adjust_valid_total_size(File, Delta, State = #msstate {
+ sum_valid_data = SumValid,
+ file_summary_ets = FileSummaryEts }) ->
+ [_] = ets:update_counter(FileSummaryEts, File,
+ [{#file_summary.valid_total_size, Delta}]),
+ State #msstate { sum_valid_data = SumValid + Delta }.
+
+maps_store(Key, Val, Dict) ->
+ false = maps:is_key(Key, Dict),
+ maps:put(Key, Val, Dict).
+
+update_pending_confirms(Fun, CRef,
+ State = #msstate { clients = Clients,
+ cref_to_msg_ids = CTM }) ->
+ case maps:get(CRef, Clients) of
+ {_CPid, undefined, _CloseFDsFun} -> State;
+ {_CPid, MsgOnDiskFun, _CloseFDsFun} -> CTM1 = Fun(MsgOnDiskFun, CTM),
+ State #msstate {
+ cref_to_msg_ids = CTM1 }
+ end.
+
+record_pending_confirm(CRef, MsgId, State) ->
+ update_pending_confirms(
+ fun (_MsgOnDiskFun, CTM) ->
+ NewMsgIds = case maps:find(CRef, CTM) of
+ error -> gb_sets:singleton(MsgId);
+ {ok, MsgIds} -> gb_sets:add(MsgId, MsgIds)
+ end,
+ maps:put(CRef, NewMsgIds, CTM)
+ end, CRef, State).
+
+client_confirm(CRef, MsgIds, ActionTaken, State) ->
+ update_pending_confirms(
+ fun (MsgOnDiskFun, CTM) ->
+ case maps:find(CRef, CTM) of
+ {ok, Gs} -> MsgOnDiskFun(gb_sets:intersection(Gs, MsgIds),
+ ActionTaken),
+ MsgIds1 = rabbit_misc:gb_sets_difference(
+ Gs, MsgIds),
+ case gb_sets:is_empty(MsgIds1) of
+ true -> maps:remove(CRef, CTM);
+ false -> maps:put(CRef, MsgIds1, CTM)
+ end;
+ error -> CTM
+ end
+ end, CRef, State).
+
+blind_confirm(CRef, MsgIds, ActionTaken, State) ->
+ update_pending_confirms(
+ fun (MsgOnDiskFun, CTM) -> MsgOnDiskFun(MsgIds, ActionTaken), CTM end,
+ CRef, State).
+
+%% Detect whether the MsgId is older or younger than the client's death
+%% msg (if there is one). If the msg is older than the client death
+%% msg, and it has a 0 ref_count we must only alter the ref_count, not
+%% rewrite the msg - rewriting it would make it younger than the death
+%% msg and thus should be ignored. Note that this (correctly) returns
+%% false when testing to remove the death msg itself.
+should_mask_action(CRef, MsgId,
+ State = #msstate{dying_clients = DyingClients}) ->
+ case {maps:find(CRef, DyingClients), index_lookup(MsgId, State)} of
+ {error, Location} ->
+ {false, Location};
+ {{ok, _}, not_found} ->
+ {true, not_found};
+ {{ok, Client}, #msg_location { file = File, offset = Offset,
+ ref_count = RefCount } = Location} ->
+ #dying_client{file = DeathFile, offset = DeathOffset} = Client,
+ {case {{DeathFile, DeathOffset} < {File, Offset}, RefCount} of
+ {true, _} -> true;
+ {false, 0} -> false_if_increment;
+ {false, _} -> false
+ end, Location}
+ end.
+
+%%----------------------------------------------------------------------------
+%% file helper functions
+%%----------------------------------------------------------------------------
+
+open_file(File, Mode) ->
+ file_handle_cache:open_with_absolute_path(
+ File, ?BINARY_MODE ++ Mode,
+ [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE},
+ {read_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
+
+open_file(Dir, FileName, Mode) ->
+ open_file(form_filename(Dir, FileName), Mode).
+
+close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) ->
+ CState #client_msstate { file_handle_cache = close_handle(Key, FHC) };
+
+close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
+ State #msstate { file_handle_cache = close_handle(Key, FHC) };
+
+close_handle(Key, FHC) ->
+ case maps:find(Key, FHC) of
+ {ok, Hdl} -> ok = file_handle_cache:close(Hdl),
+ maps:remove(Key, FHC);
+ error -> FHC
+ end.
+
+mark_handle_open(FileHandlesEts, File, Ref) ->
+ %% This is fine to fail (already exists). Note it could fail with
+ %% the value being close, and not have it updated to open.
+ ets:insert_new(FileHandlesEts, {{Ref, File}, open}),
+ true.
+
+%% See comment in client_read3 - only call this when the file is locked
+mark_handle_to_close(ClientRefs, FileHandlesEts, File, Invoke) ->
+ [ begin
+ case (ets:update_element(FileHandlesEts, Key, {2, close})
+ andalso Invoke) of
+ true -> case maps:get(Ref, ClientRefs) of
+ {_CPid, _MsgOnDiskFun, undefined} ->
+ ok;
+ {_CPid, _MsgOnDiskFun, CloseFDsFun} ->
+ ok = CloseFDsFun()
+ end;
+ false -> ok
+ end
+ end || {{Ref, _File} = Key, open} <-
+ ets:match_object(FileHandlesEts, {{'_', File}, open}) ],
+ true.
+
+safe_file_delete_fun(File, Dir, FileHandlesEts) ->
+ fun () -> safe_file_delete(File, Dir, FileHandlesEts) end.
+
+safe_file_delete(File, Dir, FileHandlesEts) ->
+ %% do not match on any value - it's the absence of the row that
+ %% indicates the client has really closed the file.
+ case ets:match_object(FileHandlesEts, {{'_', File}, '_'}, 1) of
+ {[_|_], _Cont} -> false;
+ _ -> ok = file:delete(
+ form_filename(Dir, filenum_to_name(File))),
+ true
+ end.
+
+-spec close_all_indicated
+ (client_msstate()) -> rabbit_types:ok(client_msstate()).
+
+close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts,
+ client_ref = Ref } =
+ CState) ->
+ Objs = ets:match_object(FileHandlesEts, {{Ref, '_'}, close}),
+ {ok, lists:foldl(fun ({Key = {_Ref, File}, close}, CStateM) ->
+ true = ets:delete(FileHandlesEts, Key),
+ close_handle(File, CStateM)
+ end, CState, Objs)}.
+
+close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts,
+ file_handle_cache = FHC,
+ client_ref = Ref }) ->
+ ok = maps:fold(fun (File, Hdl, ok) ->
+ true = ets:delete(FileHandlesEts, {Ref, File}),
+ file_handle_cache:close(Hdl)
+ end, ok, FHC),
+ CState #client_msstate { file_handle_cache = #{} };
+
+close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
+ ok = maps:fold(fun (_Key, Hdl, ok) -> file_handle_cache:close(Hdl) end,
+ ok, FHC),
+ State #msstate { file_handle_cache = #{} }.
+
+get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC,
+ dir = Dir }) ->
+ {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
+ {Hdl, CState #client_msstate { file_handle_cache = FHC2 }};
+
+get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC,
+ dir = Dir }) ->
+ {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
+ {Hdl, State #msstate { file_handle_cache = FHC2 }}.
+
+get_read_handle(FileNum, FHC, Dir) ->
+ case maps:find(FileNum, FHC) of
+ {ok, Hdl} -> {Hdl, FHC};
+ error -> {ok, Hdl} = open_file(Dir, filenum_to_name(FileNum),
+ ?READ_MODE),
+ {Hdl, maps:put(FileNum, Hdl, FHC)}
+ end.
+
+preallocate(Hdl, FileSizeLimit, FinalPos) ->
+ {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
+ ok = file_handle_cache:truncate(Hdl),
+ {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
+ ok.
+
+truncate_and_extend_file(Hdl, Lowpoint, Highpoint) ->
+ {ok, Lowpoint} = file_handle_cache:position(Hdl, Lowpoint),
+ ok = file_handle_cache:truncate(Hdl),
+ ok = preallocate(Hdl, Highpoint, Lowpoint).
+
+form_filename(Dir, Name) -> filename:join(Dir, Name).
+
+filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
+
+filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
+
+list_sorted_filenames(Dir, Ext) ->
+ lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
+ filelib:wildcard("*" ++ Ext, Dir)).
+
+%%----------------------------------------------------------------------------
+%% index
+%%----------------------------------------------------------------------------
+
+index_lookup_positive_ref_count(Key, State) ->
+ case index_lookup(Key, State) of
+ not_found -> not_found;
+ #msg_location { ref_count = 0 } -> not_found;
+ #msg_location {} = MsgLocation -> MsgLocation
+ end.
+
+index_update_ref_count(Key, RefCount, State) ->
+ index_update_fields(Key, {#msg_location.ref_count, RefCount}, State).
+
+index_lookup(Key, #gc_state { index_module = Index,
+ index_state = State }) ->
+ Index:lookup(Key, State);
+
+index_lookup(Key, #client_msstate { index_module = Index,
+ index_state = State }) ->
+ Index:lookup(Key, State);
+
+index_lookup(Key, #msstate { index_module = Index, index_state = State }) ->
+ Index:lookup(Key, State).
+
+index_insert(Obj, #msstate { index_module = Index, index_state = State }) ->
+ Index:insert(Obj, State).
+
+index_update(Obj, #msstate { index_module = Index, index_state = State }) ->
+ Index:update(Obj, State).
+
+index_update_fields(Key, Updates, #msstate{ index_module = Index,
+ index_state = State }) ->
+ Index:update_fields(Key, Updates, State);
+index_update_fields(Key, Updates, #gc_state{ index_module = Index,
+ index_state = State }) ->
+ Index:update_fields(Key, Updates, State).
+
+index_delete(Key, #msstate { index_module = Index, index_state = State }) ->
+ Index:delete(Key, State).
+
+index_delete_object(Obj, #gc_state{ index_module = Index,
+ index_state = State }) ->
+ Index:delete_object(Obj, State).
+
+index_clean_up_temporary_reference_count_entries(
+ #msstate { index_module = Index,
+ index_state = State }) ->
+ Index:clean_up_temporary_reference_count_entries_without_file(State).
+
+%%----------------------------------------------------------------------------
+%% shutdown and recovery
+%%----------------------------------------------------------------------------
+
+recover_index_and_client_refs(IndexModule, _Recover, undefined, Dir, _Name) ->
+ {false, IndexModule:new(Dir), []};
+recover_index_and_client_refs(IndexModule, false, _ClientRefs, Dir, Name) ->
+ rabbit_log:warning("Message store ~tp: rebuilding indices from scratch~n", [Name]),
+ {false, IndexModule:new(Dir), []};
+recover_index_and_client_refs(IndexModule, true, ClientRefs, Dir, Name) ->
+ Fresh = fun (ErrorMsg, ErrorArgs) ->
+ rabbit_log:warning("Message store ~tp : " ++ ErrorMsg ++ "~n"
+ "rebuilding indices from scratch~n",
+ [Name | ErrorArgs]),
+ {false, IndexModule:new(Dir), []}
+ end,
+ case read_recovery_terms(Dir) of
+ {false, Error} ->
+ Fresh("failed to read recovery terms: ~p", [Error]);
+ {true, Terms} ->
+ RecClientRefs = proplists:get_value(client_refs, Terms, []),
+ RecIndexModule = proplists:get_value(index_module, Terms),
+ case (lists:sort(ClientRefs) =:= lists:sort(RecClientRefs)
+ andalso IndexModule =:= RecIndexModule) of
+ true -> case IndexModule:recover(Dir) of
+ {ok, IndexState1} ->
+ {true, IndexState1, ClientRefs};
+ {error, Error} ->
+ Fresh("failed to recover index: ~p", [Error])
+ end;
+ false -> Fresh("recovery terms differ from present", [])
+ end
+ end.
+
+store_recovery_terms(Terms, Dir) ->
+ rabbit_file:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
+
+read_recovery_terms(Dir) ->
+ Path = filename:join(Dir, ?CLEAN_FILENAME),
+ case rabbit_file:read_term_file(Path) of
+ {ok, Terms} -> case file:delete(Path) of
+ ok -> {true, Terms};
+ {error, Error} -> {false, Error}
+ end;
+ {error, Error} -> {false, Error}
+ end.
+
+store_file_summary(Tid, Dir) ->
+ ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+ [{extended_info, [object_count]}]).
+
+recover_file_summary(false, _Dir) ->
+ %% TODO: the only reason for this to be an *ordered*_set is so
+ %% that a) maybe_compact can start a traversal from the eldest
+ %% file, and b) build_index in fast recovery mode can easily
+ %% identify the current file. It's awkward to have both that
+ %% odering and the left/right pointers in the entries - replacing
+ %% the former with some additional bit of state would be easy, but
+ %% ditching the latter would be neater.
+ {false, ets:new(rabbit_msg_store_file_summary,
+ [ordered_set, public, {keypos, #file_summary.file}])};
+recover_file_summary(true, Dir) ->
+ Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+ case ets:file2tab(Path) of
+ {ok, Tid} -> ok = file:delete(Path),
+ {true, Tid};
+ {error, _Error} -> recover_file_summary(false, Dir)
+ end.
+
+count_msg_refs(Gen, Seed, State) ->
+ case Gen(Seed) of
+ finished ->
+ ok;
+ {_MsgId, 0, Next} ->
+ count_msg_refs(Gen, Next, State);
+ {MsgId, Delta, Next} ->
+ ok = case index_lookup(MsgId, State) of
+ not_found ->
+ index_insert(#msg_location { msg_id = MsgId,
+ file = undefined,
+ ref_count = Delta },
+ State);
+ #msg_location { ref_count = RefCount } = StoreEntry ->
+ NewRefCount = RefCount + Delta,
+ case NewRefCount of
+ 0 -> index_delete(MsgId, State);
+ _ -> index_update(StoreEntry #msg_location {
+ ref_count = NewRefCount },
+ State)
+ end
+ end,
+ count_msg_refs(Gen, Next, State)
+ end.
+
+recover_crashed_compactions(Dir) ->
+ FileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION),
+ TmpFileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION_TMP),
+ lists:foreach(
+ fun (TmpFileName) ->
+ NonTmpRelatedFileName =
+ filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
+ true = lists:member(NonTmpRelatedFileName, FileNames),
+ ok = recover_crashed_compaction(
+ Dir, TmpFileName, NonTmpRelatedFileName)
+ end, TmpFileNames),
+ TmpFileNames == [].
+
+recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) ->
+ %% Because a msg can legitimately appear multiple times in the
+ %% same file, identifying the contents of the tmp file and where
+ %% they came from is non-trivial. If we are recovering a crashed
+ %% compaction then we will be rebuilding the index, which can cope
+ %% with duplicates appearing. Thus the simplest and safest thing
+ %% to do is to append the contents of the tmp file to its main
+ %% file.
+ {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_MODE),
+ {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName,
+ ?READ_MODE ++ ?WRITE_MODE),
+ {ok, _End} = file_handle_cache:position(MainHdl, eof),
+ Size = filelib:file_size(form_filename(Dir, TmpFileName)),
+ {ok, Size} = file_handle_cache:copy(TmpHdl, MainHdl, Size),
+ ok = file_handle_cache:close(MainHdl),
+ ok = file_handle_cache:delete(TmpHdl),
+ ok.
+
+scan_file_for_valid_messages(File) ->
+ case open_file(File, ?READ_MODE) of
+ {ok, Hdl} -> Valid = rabbit_msg_file:scan(
+ Hdl, filelib:file_size(File),
+ fun scan_fun/2, []),
+ ok = file_handle_cache:close(Hdl),
+ Valid;
+ {error, enoent} -> {ok, [], 0};
+ {error, Reason} -> {error, {unable_to_scan_file,
+ filename:basename(File),
+ Reason}}
+ end.
+
+scan_file_for_valid_messages(Dir, FileName) ->
+ scan_file_for_valid_messages(form_filename(Dir, FileName)).
+
+scan_fun({MsgId, TotalSize, Offset, _Msg}, Acc) ->
+ [{MsgId, TotalSize, Offset} | Acc].
+
+%% Takes the list in *ascending* order (i.e. eldest message
+%% first). This is the opposite of what scan_file_for_valid_messages
+%% produces. The list of msgs that is produced is youngest first.
+drop_contiguous_block_prefix(L) -> drop_contiguous_block_prefix(L, 0).
+
+drop_contiguous_block_prefix([], ExpectedOffset) ->
+ {ExpectedOffset, []};
+drop_contiguous_block_prefix([#msg_location { offset = ExpectedOffset,
+ total_size = TotalSize } | Tail],
+ ExpectedOffset) ->
+ ExpectedOffset1 = ExpectedOffset + TotalSize,
+ drop_contiguous_block_prefix(Tail, ExpectedOffset1);
+drop_contiguous_block_prefix(MsgsAfterGap, ExpectedOffset) ->
+ {ExpectedOffset, MsgsAfterGap}.
+
+build_index(true, _StartupFunState,
+ State = #msstate { file_summary_ets = FileSummaryEts }) ->
+ ets:foldl(
+ fun (#file_summary { valid_total_size = ValidTotalSize,
+ file_size = FileSize,
+ file = File },
+ {_Offset, State1 = #msstate { sum_valid_data = SumValid,
+ sum_file_size = SumFileSize }}) ->
+ {FileSize, State1 #msstate {
+ sum_valid_data = SumValid + ValidTotalSize,
+ sum_file_size = SumFileSize + FileSize,
+ current_file = File }}
+ end, {0, State}, FileSummaryEts);
+build_index(false, {MsgRefDeltaGen, MsgRefDeltaGenInit},
+ State = #msstate { dir = Dir }) ->
+ rabbit_log:debug("Rebuilding message refcount...~n", []),
+ ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
+ rabbit_log:debug("Done rebuilding message refcount~n", []),
+ {ok, Pid} = gatherer:start_link(),
+ case [filename_to_num(FileName) ||
+ FileName <- list_sorted_filenames(Dir, ?FILE_EXTENSION)] of
+ [] -> rebuild_index(Pid, [State #msstate.current_file],
+ State);
+ Files -> {Offset, State1} = rebuild_index(Pid, Files, State),
+ {Offset, lists:foldl(fun delete_file_if_empty/2,
+ State1, Files)}
+ end.
+
+build_index_worker(Gatherer, State = #msstate { dir = Dir },
+ Left, File, Files) ->
+ FileName = filenum_to_name(File),
+ rabbit_log:debug("Rebuilding message location index from ~p (~B file(s) remaining)~n",
+ [form_filename(Dir, FileName), length(Files)]),
+ {ok, Messages, FileSize} =
+ scan_file_for_valid_messages(Dir, FileName),
+ {ValidMessages, ValidTotalSize} =
+ lists:foldl(
+ fun (Obj = {MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+ case index_lookup(MsgId, State) of
+ #msg_location { file = undefined } = StoreEntry ->
+ ok = index_update(StoreEntry #msg_location {
+ file = File, offset = Offset,
+ total_size = TotalSize },
+ State),
+ {[Obj | VMAcc], VTSAcc + TotalSize};
+ _ ->
+ {VMAcc, VTSAcc}
+ end
+ end, {[], 0}, Messages),
+ {Right, FileSize1} =
+ case Files of
+ %% if it's the last file, we'll truncate to remove any
+ %% rubbish above the last valid message. This affects the
+ %% file size.
+ [] -> {undefined, case ValidMessages of
+ [] -> 0;
+ _ -> {_MsgId, TotalSize, Offset} =
+ lists:last(ValidMessages),
+ Offset + TotalSize
+ end};
+ [F|_] -> {F, FileSize}
+ end,
+ ok = gatherer:in(Gatherer, #file_summary {
+ file = File,
+ valid_total_size = ValidTotalSize,
+ left = Left,
+ right = Right,
+ file_size = FileSize1,
+ locked = false,
+ readers = 0 }),
+ ok = gatherer:finish(Gatherer).
+
+enqueue_build_index_workers(_Gatherer, _Left, [], _State) ->
+ exit(normal);
+enqueue_build_index_workers(Gatherer, Left, [File|Files], State) ->
+ ok = worker_pool:dispatch_sync(
+ fun () ->
+ link(Gatherer),
+ ok = build_index_worker(Gatherer, State,
+ Left, File, Files),
+ unlink(Gatherer),
+ ok
+ end),
+ enqueue_build_index_workers(Gatherer, File, Files, State).
+
+reduce_index(Gatherer, LastFile,
+ State = #msstate { file_summary_ets = FileSummaryEts,
+ sum_valid_data = SumValid,
+ sum_file_size = SumFileSize }) ->
+ case gatherer:out(Gatherer) of
+ empty ->
+ ok = gatherer:stop(Gatherer),
+ ok = index_clean_up_temporary_reference_count_entries(State),
+ Offset = case ets:lookup(FileSummaryEts, LastFile) of
+ [] -> 0;
+ [#file_summary { file_size = FileSize }] -> FileSize
+ end,
+ {Offset, State #msstate { current_file = LastFile }};
+ {value, #file_summary { valid_total_size = ValidTotalSize,
+ file_size = FileSize } = FileSummary} ->
+ true = ets:insert_new(FileSummaryEts, FileSummary),
+ reduce_index(Gatherer, LastFile,
+ State #msstate {
+ sum_valid_data = SumValid + ValidTotalSize,
+ sum_file_size = SumFileSize + FileSize })
+ end.
+
+rebuild_index(Gatherer, Files, State) ->
+ lists:foreach(fun (_File) ->
+ ok = gatherer:fork(Gatherer)
+ end, Files),
+ Pid = spawn(
+ fun () ->
+ enqueue_build_index_workers(Gatherer, undefined,
+ Files, State)
+ end),
+ erlang:monitor(process, Pid),
+ reduce_index(Gatherer, lists:last(Files), State).
+
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation -- internal
+%%----------------------------------------------------------------------------
+
+maybe_roll_to_new_file(
+ Offset,
+ State = #msstate { dir = Dir,
+ current_file_handle = CurHdl,
+ current_file = CurFile,
+ file_summary_ets = FileSummaryEts,
+ cur_file_cache_ets = CurFileCacheEts,
+ file_size_limit = FileSizeLimit })
+ when Offset >= FileSizeLimit ->
+ State1 = internal_sync(State),
+ ok = file_handle_cache:close(CurHdl),
+ NextFile = CurFile + 1,
+ {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
+ true = ets:insert_new(FileSummaryEts, #file_summary {
+ file = NextFile,
+ valid_total_size = 0,
+ left = CurFile,
+ right = undefined,
+ file_size = 0,
+ locked = false,
+ readers = 0 }),
+ true = ets:update_element(FileSummaryEts, CurFile,
+ {#file_summary.right, NextFile}),
+ true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}),
+ maybe_compact(State1 #msstate { current_file_handle = NextHdl,
+ current_file = NextFile });
+maybe_roll_to_new_file(_, State) ->
+ State.
+
+maybe_compact(State = #msstate { sum_valid_data = SumValid,
+ sum_file_size = SumFileSize,
+ gc_pid = GCPid,
+ pending_gc_completion = Pending,
+ file_summary_ets = FileSummaryEts,
+ file_size_limit = FileSizeLimit })
+ when SumFileSize > 2 * FileSizeLimit andalso
+ (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
+ %% TODO: the algorithm here is sub-optimal - it may result in a
+ %% complete traversal of FileSummaryEts.
+ First = ets:first(FileSummaryEts),
+ case First =:= '$end_of_table' orelse
+ maps:size(Pending) >= ?MAXIMUM_SIMULTANEOUS_GC_FILES of
+ true ->
+ State;
+ false ->
+ case find_files_to_combine(FileSummaryEts, FileSizeLimit,
+ ets:lookup(FileSummaryEts, First)) of
+ not_found ->
+ State;
+ {Src, Dst} ->
+ Pending1 = maps_store(Dst, [],
+ maps_store(Src, [], Pending)),
+ State1 = close_handle(Src, close_handle(Dst, State)),
+ true = ets:update_element(FileSummaryEts, Src,
+ {#file_summary.locked, true}),
+ true = ets:update_element(FileSummaryEts, Dst,
+ {#file_summary.locked, true}),
+ ok = rabbit_msg_store_gc:combine(GCPid, Src, Dst),
+ State1 #msstate { pending_gc_completion = Pending1 }
+ end
+ end;
+maybe_compact(State) ->
+ State.
+
+find_files_to_combine(FileSummaryEts, FileSizeLimit,
+ [#file_summary { file = Dst,
+ valid_total_size = DstValid,
+ right = Src,
+ locked = DstLocked }]) ->
+ case Src of
+ undefined ->
+ not_found;
+ _ ->
+ [#file_summary { file = Src,
+ valid_total_size = SrcValid,
+ left = Dst,
+ right = SrcRight,
+ locked = SrcLocked }] = Next =
+ ets:lookup(FileSummaryEts, Src),
+ case SrcRight of
+ undefined -> not_found;
+ _ -> case (DstValid + SrcValid =< FileSizeLimit) andalso
+ (DstValid > 0) andalso (SrcValid > 0) andalso
+ not (DstLocked orelse SrcLocked) of
+ true -> {Src, Dst};
+ false -> find_files_to_combine(
+ FileSummaryEts, FileSizeLimit, Next)
+ end
+ end
+ end.
+
+delete_file_if_empty(File, State = #msstate { current_file = File }) ->
+ State;
+delete_file_if_empty(File, State = #msstate {
+ gc_pid = GCPid,
+ file_summary_ets = FileSummaryEts,
+ pending_gc_completion = Pending }) ->
+ [#file_summary { valid_total_size = ValidData,
+ locked = false }] =
+ ets:lookup(FileSummaryEts, File),
+ case ValidData of
+ %% don't delete the file_summary_ets entry for File here
+ %% because we could have readers which need to be able to
+ %% decrement the readers count.
+ 0 -> true = ets:update_element(FileSummaryEts, File,
+ {#file_summary.locked, true}),
+ ok = rabbit_msg_store_gc:delete(GCPid, File),
+ Pending1 = maps_store(File, [], Pending),
+ close_handle(File,
+ State #msstate { pending_gc_completion = Pending1 });
+ _ -> State
+ end.
+
+cleanup_after_file_deletion(File,
+ #msstate { file_handles_ets = FileHandlesEts,
+ file_summary_ets = FileSummaryEts,
+ clients = Clients }) ->
+ %% Ensure that any clients that have open fhs to the file close
+ %% them before using them again. This has to be done here (given
+ %% it's done in the msg_store, and not the gc), and not when
+ %% starting up the GC, because if done when starting up the GC,
+ %% the client could find the close, and close and reopen the fh,
+ %% whilst the GC is waiting for readers to disappear, before it's
+ %% actually done the GC.
+ true = mark_handle_to_close(Clients, FileHandlesEts, File, true),
+ [#file_summary { left = Left,
+ right = Right,
+ locked = true,
+ readers = 0 }] = ets:lookup(FileSummaryEts, File),
+ %% We'll never delete the current file, so right is never undefined
+ true = Right =/= undefined, %% ASSERTION
+ true = ets:update_element(FileSummaryEts, Right,
+ {#file_summary.left, Left}),
+ %% ensure the double linked list is maintained
+ true = case Left of
+ undefined -> true; %% File is the eldest file (left-most)
+ _ -> ets:update_element(FileSummaryEts, Left,
+ {#file_summary.right, Right})
+ end,
+ true = ets:delete(FileSummaryEts, File),
+ ok.
+
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation -- external
+%%----------------------------------------------------------------------------
+
+-spec combine_files(non_neg_integer(), non_neg_integer(), gc_state()) ->
+ {ok, deletion_thunk()} | {defer, [non_neg_integer()]}.
+
+combine_files(Source, Destination,
+ State = #gc_state { file_summary_ets = FileSummaryEts }) ->
+ [#file_summary{locked = true} = SourceSummary] =
+ ets:lookup(FileSummaryEts, Source),
+
+ [#file_summary{locked = true} = DestinationSummary] =
+ ets:lookup(FileSummaryEts, Destination),
+
+ case {SourceSummary, DestinationSummary} of
+ {#file_summary{readers = 0}, #file_summary{readers = 0}} ->
+ {ok, do_combine_files(SourceSummary, DestinationSummary,
+ Source, Destination, State)};
+ _ ->
+ rabbit_log:debug("Asked to combine files ~p and ~p but they have active readers. Deferring.",
+ [Source, Destination]),
+ DeferredFiles = [FileSummary#file_summary.file
+ || FileSummary <- [SourceSummary, DestinationSummary],
+ FileSummary#file_summary.readers /= 0],
+ {defer, DeferredFiles}
+ end.
+
+do_combine_files(SourceSummary, DestinationSummary,
+ Source, Destination,
+ State = #gc_state { file_summary_ets = FileSummaryEts,
+ file_handles_ets = FileHandlesEts,
+ dir = Dir,
+ msg_store = Server }) ->
+ #file_summary {
+ readers = 0,
+ left = Destination,
+ valid_total_size = SourceValid,
+ file_size = SourceFileSize,
+ locked = true } = SourceSummary,
+ #file_summary {
+ readers = 0,
+ right = Source,
+ valid_total_size = DestinationValid,
+ file_size = DestinationFileSize,
+ locked = true } = DestinationSummary,
+
+ SourceName = filenum_to_name(Source),
+ DestinationName = filenum_to_name(Destination),
+ {ok, SourceHdl} = open_file(Dir, SourceName,
+ ?READ_AHEAD_MODE),
+ {ok, DestinationHdl} = open_file(Dir, DestinationName,
+ ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+ TotalValidData = SourceValid + DestinationValid,
+ %% if DestinationValid =:= DestinationContiguousTop then we don't
+ %% need a tmp file
+ %% if they're not equal, then we need to write out everything past
+ %% the DestinationContiguousTop to a tmp file then truncate,
+ %% copy back in, and then copy over from Source
+ %% otherwise we just truncate straight away and copy over from Source
+ {DestinationWorkList, DestinationValid} =
+ load_and_vacuum_message_file(Destination, State),
+ {DestinationContiguousTop, DestinationWorkListTail} =
+ drop_contiguous_block_prefix(DestinationWorkList),
+ case DestinationWorkListTail of
+ [] -> ok = truncate_and_extend_file(
+ DestinationHdl, DestinationContiguousTop, TotalValidData);
+ _ -> Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
+ {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE++?WRITE_MODE),
+ ok = copy_messages(
+ DestinationWorkListTail, DestinationContiguousTop,
+ DestinationValid, DestinationHdl, TmpHdl, Destination,
+ State),
+ TmpSize = DestinationValid - DestinationContiguousTop,
+ %% so now Tmp contains everything we need to salvage
+ %% from Destination, and index_state has been updated to
+ %% reflect the compaction of Destination so truncate
+ %% Destination and copy from Tmp back to the end
+ {ok, 0} = file_handle_cache:position(TmpHdl, 0),
+ ok = truncate_and_extend_file(
+ DestinationHdl, DestinationContiguousTop, TotalValidData),
+ {ok, TmpSize} =
+ file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
+ %% position in DestinationHdl should now be DestinationValid
+ ok = file_handle_cache:sync(DestinationHdl),
+ ok = file_handle_cache:delete(TmpHdl)
+ end,
+ {SourceWorkList, SourceValid} = load_and_vacuum_message_file(Source, State),
+ ok = copy_messages(SourceWorkList, DestinationValid, TotalValidData,
+ SourceHdl, DestinationHdl, Destination, State),
+ %% tidy up
+ ok = file_handle_cache:close(DestinationHdl),
+ ok = file_handle_cache:close(SourceHdl),
+
+ %% don't update dest.right, because it could be changing at the
+ %% same time
+ true = ets:update_element(
+ FileSummaryEts, Destination,
+ [{#file_summary.valid_total_size, TotalValidData},
+ {#file_summary.file_size, TotalValidData}]),
+
+ Reclaimed = SourceFileSize + DestinationFileSize - TotalValidData,
+ rabbit_log:debug("Combined segment files number ~p (source) and ~p (destination), reclaimed ~p bytes",
+ [Source, Destination, Reclaimed]),
+ gen_server2:cast(Server, {combine_files, Source, Destination, Reclaimed}),
+ safe_file_delete_fun(Source, Dir, FileHandlesEts).
+
+-spec delete_file(non_neg_integer(), gc_state()) -> {ok, deletion_thunk()} | {defer, [non_neg_integer()]}.
+
+delete_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
+ file_handles_ets = FileHandlesEts,
+ dir = Dir,
+ msg_store = Server }) ->
+ case ets:lookup(FileSummaryEts, File) of
+ [#file_summary { valid_total_size = 0,
+ locked = true,
+ file_size = FileSize,
+ readers = 0 }] ->
+ {[], 0} = load_and_vacuum_message_file(File, State),
+ gen_server2:cast(Server, {delete_file, File, FileSize}),
+ {ok, safe_file_delete_fun(File, Dir, FileHandlesEts)};
+ [#file_summary{readers = Readers}] when Readers > 0 ->
+ rabbit_log:debug("Asked to delete file ~p but it has active readers. Deferring.",
+ [File]),
+ {defer, [File]}
+ end.
+
+load_and_vacuum_message_file(File, State = #gc_state { dir = Dir }) ->
+ %% Messages here will be end-of-file at start-of-list
+ {ok, Messages, _FileSize} =
+ scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+ %% foldl will reverse so will end up with msgs in ascending offset order
+ lists:foldl(
+ fun ({MsgId, TotalSize, Offset}, Acc = {List, Size}) ->
+ case index_lookup(MsgId, State) of
+ #msg_location { file = File, total_size = TotalSize,
+ offset = Offset, ref_count = 0 } = Entry ->
+ ok = index_delete_object(Entry, State),
+ Acc;
+ #msg_location { file = File, total_size = TotalSize,
+ offset = Offset } = Entry ->
+ {[ Entry | List ], TotalSize + Size};
+ _ ->
+ Acc
+ end
+ end, {[], 0}, Messages).
+
+copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
+ Destination, State) ->
+ Copy = fun ({BlockStart, BlockEnd}) ->
+ BSize = BlockEnd - BlockStart,
+ {ok, BlockStart} =
+ file_handle_cache:position(SourceHdl, BlockStart),
+ {ok, BSize} =
+ file_handle_cache:copy(SourceHdl, DestinationHdl, BSize)
+ end,
+ case
+ lists:foldl(
+ fun (#msg_location { msg_id = MsgId, offset = Offset,
+ total_size = TotalSize },
+ {CurOffset, Block = {BlockStart, BlockEnd}}) ->
+ %% CurOffset is in the DestinationFile.
+ %% Offset, BlockStart and BlockEnd are in the SourceFile
+ %% update MsgLocation to reflect change of file and offset
+ ok = index_update_fields(MsgId,
+ [{#msg_location.file, Destination},
+ {#msg_location.offset, CurOffset}],
+ State),
+ {CurOffset + TotalSize,
+ case BlockEnd of
+ undefined ->
+ %% base case, called only for the first list elem
+ {Offset, Offset + TotalSize};
+ Offset ->
+ %% extend the current block because the
+ %% next msg follows straight on
+ {BlockStart, BlockEnd + TotalSize};
+ _ ->
+ %% found a gap, so actually do the work for
+ %% the previous block
+ Copy(Block),
+ {Offset, Offset + TotalSize}
+ end}
+ end, {InitOffset, {undefined, undefined}}, WorkList) of
+ {FinalOffset, Block} ->
+ case WorkList of
+ [] -> ok;
+ _ -> Copy(Block), %% do the last remaining block
+ ok = file_handle_cache:sync(DestinationHdl)
+ end;
+ {FinalOffsetZ, _Block} ->
+ {gc_error, [{expected, FinalOffset},
+ {got, FinalOffsetZ},
+ {destination, Destination}]}
+ end.
+
+-spec force_recovery(file:filename(), server()) -> 'ok'.
+
+force_recovery(BaseDir, Store) ->
+ Dir = filename:join(BaseDir, atom_to_list(Store)),
+ case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
+ ok -> ok;
+ {error, enoent} -> ok
+ end,
+ recover_crashed_compactions(BaseDir),
+ ok.
+
+foreach_file(D, Fun, Files) ->
+ [ok = Fun(filename:join(D, File)) || File <- Files].
+
+foreach_file(D1, D2, Fun, Files) ->
+ [ok = Fun(filename:join(D1, File), filename:join(D2, File)) || File <- Files].
+
+-spec transform_dir(file:filename(), server(),
+ fun ((any()) -> (rabbit_types:ok_or_error2(msg(), any())))) -> 'ok'.
+
+transform_dir(BaseDir, Store, TransformFun) ->
+ Dir = filename:join(BaseDir, atom_to_list(Store)),
+ TmpDir = filename:join(Dir, ?TRANSFORM_TMP),
+ TransformFile = fun (A, B) -> transform_msg_file(A, B, TransformFun) end,
+ CopyFile = fun (Src, Dst) -> {ok, _Bytes} = file:copy(Src, Dst), ok end,
+ case filelib:is_dir(TmpDir) of
+ true -> throw({error, transform_failed_previously});
+ false -> FileList = list_sorted_filenames(Dir, ?FILE_EXTENSION),
+ foreach_file(Dir, TmpDir, TransformFile, FileList),
+ foreach_file(Dir, fun file:delete/1, FileList),
+ foreach_file(TmpDir, Dir, CopyFile, FileList),
+ foreach_file(TmpDir, fun file:delete/1, FileList),
+ ok = file:del_dir(TmpDir)
+ end.
+
+transform_msg_file(FileOld, FileNew, TransformFun) ->
+ ok = rabbit_file:ensure_parent_dirs_exist(FileNew),
+ {ok, RefOld} = file_handle_cache:open_with_absolute_path(
+ FileOld, [raw, binary, read], []),
+ {ok, RefNew} = file_handle_cache:open_with_absolute_path(
+ FileNew, [raw, binary, write],
+ [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]),
+ {ok, _Acc, _IgnoreSize} =
+ rabbit_msg_file:scan(
+ RefOld, filelib:file_size(FileOld),
+ fun({MsgId, _Size, _Offset, BinMsg}, ok) ->
+ {ok, MsgNew} = case binary_to_term(BinMsg) of
+ <<>> -> {ok, <<>>}; %% dying client marker
+ Msg -> TransformFun(Msg)
+ end,
+ {ok, _} = rabbit_msg_file:append(RefNew, MsgId, MsgNew),
+ ok
+ end, ok),
+ ok = file_handle_cache:close(RefOld),
+ ok = file_handle_cache:close(RefNew),
+ ok.
diff --git a/deps/rabbit/src/rabbit_msg_store_ets_index.erl b/deps/rabbit/src/rabbit_msg_store_ets_index.erl
new file mode 100644
index 0000000000..294417b5ba
--- /dev/null
+++ b/deps/rabbit/src/rabbit_msg_store_ets_index.erl
@@ -0,0 +1,76 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_msg_store_ets_index).
+
+-include("rabbit_msg_store.hrl").
+
+-behaviour(rabbit_msg_store_index).
+
+-export([new/1, recover/1,
+ lookup/2, insert/2, update/2, update_fields/3, delete/2,
+ delete_object/2, clean_up_temporary_reference_count_entries_without_file/1, terminate/1]).
+
+-define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
+-define(FILENAME, "msg_store_index.ets").
+
+-record(state, { table, dir }).
+
+new(Dir) ->
+ file:delete(filename:join(Dir, ?FILENAME)),
+ Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]),
+ #state { table = Tid, dir = Dir }.
+
+recover(Dir) ->
+ Path = filename:join(Dir, ?FILENAME),
+ case ets:file2tab(Path) of
+ {ok, Tid} -> file:delete(Path),
+ {ok, #state { table = Tid, dir = Dir }};
+ Error -> Error
+ end.
+
+lookup(Key, State) ->
+ case ets:lookup(State #state.table, Key) of
+ [] -> not_found;
+ [Entry] -> Entry
+ end.
+
+insert(Obj, State) ->
+ true = ets:insert_new(State #state.table, Obj),
+ ok.
+
+update(Obj, State) ->
+ true = ets:insert(State #state.table, Obj),
+ ok.
+
+update_fields(Key, Updates, State) ->
+ true = ets:update_element(State #state.table, Key, Updates),
+ ok.
+
+delete(Key, State) ->
+ true = ets:delete(State #state.table, Key),
+ ok.
+
+delete_object(Obj, State) ->
+ true = ets:delete_object(State #state.table, Obj),
+ ok.
+
+clean_up_temporary_reference_count_entries_without_file(State) ->
+ MatchHead = #msg_location { file = undefined, _ = '_' },
+ ets:select_delete(State #state.table, [{MatchHead, [], [true]}]),
+ ok.
+
+terminate(#state { table = MsgLocations, dir = Dir }) ->
+ case ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME),
+ [{extended_info, [object_count]}]) of
+ ok -> ok;
+ {error, Err} ->
+ rabbit_log:error("Unable to save message store index"
+ " for directory ~p.~nError: ~p~n",
+ [Dir, Err])
+ end,
+ ets:delete(MsgLocations).
diff --git a/deps/rabbit/src/rabbit_msg_store_gc.erl b/deps/rabbit/src/rabbit_msg_store_gc.erl
new file mode 100644
index 0000000000..41addc5fa6
--- /dev/null
+++ b/deps/rabbit/src/rabbit_msg_store_gc.erl
@@ -0,0 +1,125 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_msg_store_gc).
+
+-behaviour(gen_server2).
+
+-export([start_link/1, combine/3, delete/2, no_readers/2, stop/1]).
+
+-export([set_maximum_since_use/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3, prioritise_cast/3]).
+
+-record(state,
+ { pending_no_readers,
+ on_action,
+ msg_store_state
+ }).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-spec start_link(rabbit_msg_store:gc_state()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(MsgStoreState) ->
+ gen_server2:start_link(?MODULE, [MsgStoreState],
+ [{timeout, infinity}]).
+
+-spec combine(pid(), rabbit_msg_store:file_num(),
+ rabbit_msg_store:file_num()) -> 'ok'.
+
+combine(Server, Source, Destination) ->
+ gen_server2:cast(Server, {combine, Source, Destination}).
+
+-spec delete(pid(), rabbit_msg_store:file_num()) -> 'ok'.
+
+delete(Server, File) ->
+ gen_server2:cast(Server, {delete, File}).
+
+-spec no_readers(pid(), rabbit_msg_store:file_num()) -> 'ok'.
+
+no_readers(Server, File) ->
+ gen_server2:cast(Server, {no_readers, File}).
+
+-spec stop(pid()) -> 'ok'.
+
+stop(Server) ->
+ gen_server2:call(Server, stop, infinity).
+
+-spec set_maximum_since_use(pid(), non_neg_integer()) -> 'ok'.
+
+set_maximum_since_use(Pid, Age) ->
+ gen_server2:cast(Pid, {set_maximum_since_use, Age}).
+
+%%----------------------------------------------------------------------------
+
+init([MsgStoreState]) ->
+ ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+ [self()]),
+ {ok, #state { pending_no_readers = #{},
+ on_action = [],
+ msg_store_state = MsgStoreState }, hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+prioritise_cast({set_maximum_since_use, _Age}, _Len, _State) -> 8;
+prioritise_cast(_Msg, _Len, _State) -> 0.
+
+handle_call(stop, _From, State) ->
+ {stop, normal, ok, State}.
+
+handle_cast({combine, Source, Destination}, State) ->
+ {noreply, attempt_action(combine, [Source, Destination], State), hibernate};
+
+handle_cast({delete, File}, State) ->
+ {noreply, attempt_action(delete, [File], State), hibernate};
+
+handle_cast({no_readers, File},
+ State = #state { pending_no_readers = Pending }) ->
+ {noreply, case maps:find(File, Pending) of
+ error ->
+ State;
+ {ok, {Action, Files}} ->
+ Pending1 = maps:remove(File, Pending),
+ attempt_action(
+ Action, Files,
+ State #state { pending_no_readers = Pending1 })
+ end, hibernate};
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+ ok = file_handle_cache:set_maximum_since_use(Age),
+ {noreply, State, hibernate}.
+
+handle_info(Info, State) ->
+ {stop, {unhandled_info, Info}, State}.
+
+terminate(_Reason, State) ->
+ State.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+attempt_action(Action, Files,
+ State = #state { pending_no_readers = Pending,
+ on_action = Thunks,
+ msg_store_state = MsgStoreState }) ->
+ case do_action(Action, Files, MsgStoreState) of
+ {ok, OkThunk} ->
+ State#state{on_action = lists:filter(fun (Thunk) -> not Thunk() end,
+ [OkThunk | Thunks])};
+ {defer, [File | _]} ->
+ Pending1 = maps:put(File, {Action, Files}, Pending),
+ State #state { pending_no_readers = Pending1 }
+ end.
+
+do_action(combine, [Source, Destination], MsgStoreState) ->
+ rabbit_msg_store:combine_files(Source, Destination, MsgStoreState);
+do_action(delete, [File], MsgStoreState) ->
+ rabbit_msg_store:delete_file(File, MsgStoreState).
diff --git a/deps/rabbit/src/rabbit_networking.erl b/deps/rabbit/src/rabbit_networking.erl
new file mode 100644
index 0000000000..433b1d7540
--- /dev/null
+++ b/deps/rabbit/src/rabbit_networking.erl
@@ -0,0 +1,663 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_networking).
+
+%% This module contains various functions that deal with networking,
+%% TCP and TLS listeners, and connection information.
+%%
+%% It also contains a boot step — boot/0 — that starts networking machinery.
+%% This module primarily covers AMQP 0-9-1 but some bits are reused in
+%% plugins that provide protocol support, e.g. STOMP or MQTT.
+%%
+%% Functions in this module take care of normalising TCP listener options,
+%% including dual IP stack cases, and starting the AMQP 0-9-1 listener(s).
+%%
+%% See also tcp_listener_sup and tcp_listener.
+
+-export([boot/0, start_tcp_listener/2, start_ssl_listener/3,
+ stop_tcp_listener/1, on_node_down/1, active_listeners/0,
+ node_listeners/1, node_client_listeners/1,
+ register_connection/1, unregister_connection/1,
+ register_non_amqp_connection/1, unregister_non_amqp_connection/1,
+ connections/0, non_amqp_connections/0, connection_info_keys/0,
+ connection_info/1, connection_info/2,
+ connection_info_all/0, connection_info_all/1,
+ emit_connection_info_all/4, emit_connection_info_local/3,
+ close_connection/2, close_connections/2, close_all_connections/1,
+ force_connection_event_refresh/1, force_non_amqp_connection_event_refresh/1,
+ handshake/2, tcp_host/1,
+ ranch_ref/1, ranch_ref/2, ranch_ref_of_protocol/1,
+ listener_of_protocol/1, stop_ranch_listener_of_protocol/1]).
+
+%% Used by TCP-based transports, e.g. STOMP adapter
+-export([tcp_listener_addresses/1, tcp_listener_spec/9,
+ ensure_ssl/0, fix_ssl_options/1, poodle_check/1]).
+
+-export([tcp_listener_started/4, tcp_listener_stopped/4]).
+
+-deprecated([{force_connection_event_refresh, 1, eventually}]).
+
+-export([
+ local_connections/0,
+ local_non_amqp_connections/0,
+ %% prefer local_connections/0
+ connections_local/0
+]).
+
+-include("rabbit.hrl").
+-include("rabbit_misc.hrl").
+
+%% IANA-suggested ephemeral port range is 49152 to 65535
+-define(FIRST_TEST_BIND_PORT, 49152).
+
+%%----------------------------------------------------------------------------
+
+-export_type([ip_port/0, hostname/0]).
+
+-type hostname() :: rabbit_net:hostname().
+-type ip_port() :: rabbit_net:ip_port().
+
+-type family() :: atom().
+-type listener_config() :: ip_port() |
+ {hostname(), ip_port()} |
+ {hostname(), ip_port(), family()}.
+-type address() :: {inet:ip_address(), ip_port(), family()}.
+-type name_prefix() :: atom().
+-type protocol() :: atom().
+-type label() :: string().
+
+-spec boot() -> 'ok' | no_return().
+
+boot() ->
+ ok = record_distribution_listener(),
+ _ = application:start(ranch),
+ rabbit_log:debug("Started Ranch"),
+ %% Failures will throw exceptions
+ _ = boot_listeners(fun boot_tcp/1, application:get_env(rabbit, num_tcp_acceptors, 10), "TCP"),
+ _ = boot_listeners(fun boot_tls/1, application:get_env(rabbit, num_ssl_acceptors, 10), "TLS"),
+ ok.
+
+boot_listeners(Fun, NumAcceptors, Type) ->
+ case Fun(NumAcceptors) of
+ ok ->
+ ok;
+ {error, {could_not_start_listener, Address, Port, Details}} = Error ->
+ rabbit_log:error("Failed to start ~s listener [~s]:~p, error: ~p",
+ [Type, Address, Port, Details]),
+ throw(Error)
+ end.
+
+boot_tcp(NumAcceptors) ->
+ {ok, TcpListeners} = application:get_env(tcp_listeners),
+ case lists:foldl(fun(Listener, ok) ->
+ start_tcp_listener(Listener, NumAcceptors);
+ (_Listener, Error) ->
+ Error
+ end,
+ ok, TcpListeners) of
+ ok -> ok;
+ {error, _} = Error -> Error
+ end.
+
+boot_tls(NumAcceptors) ->
+ case application:get_env(ssl_listeners) of
+ {ok, []} ->
+ ok;
+ {ok, SslListeners} ->
+ SslOpts = ensure_ssl(),
+ case poodle_check('AMQP') of
+ ok -> [start_ssl_listener(L, SslOpts, NumAcceptors) || L <- SslListeners];
+ danger -> ok
+ end,
+ ok
+ end.
+
+-spec ensure_ssl() -> rabbit_types:infos().
+
+ensure_ssl() ->
+ {ok, SslAppsConfig} = application:get_env(rabbit, ssl_apps),
+ ok = app_utils:start_applications(SslAppsConfig),
+ {ok, SslOptsConfig0} = application:get_env(rabbit, ssl_options),
+ rabbit_ssl_options:fix(SslOptsConfig0).
+
+-spec poodle_check(atom()) -> 'ok' | 'danger'.
+
+poodle_check(Context) ->
+ {ok, Vsn} = application:get_key(ssl, vsn),
+ case rabbit_misc:version_compare(Vsn, "5.3", gte) of %% R16B01
+ true -> ok;
+ false -> case application:get_env(rabbit, ssl_allow_poodle_attack) of
+ {ok, true} -> ok;
+ _ -> log_poodle_fail(Context),
+ danger
+ end
+ end.
+
+log_poodle_fail(Context) ->
+ rabbit_log:error(
+ "The installed version of Erlang (~s) contains the bug OTP-10905,~n"
+ "which makes it impossible to disable SSLv3. This makes the system~n"
+ "vulnerable to the POODLE attack. SSL listeners for ~s have therefore~n"
+ "been disabled.~n~n"
+ "You are advised to upgrade to a recent Erlang version; R16B01 is the~n"
+ "first version in which this bug is fixed, but later is usually~n"
+ "better.~n~n"
+ "If you cannot upgrade now and want to re-enable SSL listeners, you can~n"
+ "set the config item 'ssl_allow_poodle_attack' to 'true' in the~n"
+ "'rabbit' section of your configuration file.~n",
+ [rabbit_misc:otp_release(), Context]).
+
+fix_ssl_options(Config) ->
+ rabbit_ssl_options:fix(Config).
+
+-spec tcp_listener_addresses(listener_config()) -> [address()].
+
+tcp_listener_addresses(Port) when is_integer(Port) ->
+ tcp_listener_addresses_auto(Port);
+tcp_listener_addresses({"auto", Port}) ->
+ %% Variant to prevent lots of hacking around in bash and batch files
+ tcp_listener_addresses_auto(Port);
+tcp_listener_addresses({Host, Port}) ->
+ %% auto: determine family IPv4 / IPv6 after converting to IP address
+ tcp_listener_addresses({Host, Port, auto});
+tcp_listener_addresses({Host, Port, Family0})
+ when is_integer(Port) andalso (Port >= 0) andalso (Port =< 65535) ->
+ [{IPAddress, Port, Family} ||
+ {IPAddress, Family} <- getaddr(Host, Family0)];
+tcp_listener_addresses({_Host, Port, _Family0}) ->
+ rabbit_log:error("invalid port ~p - not 0..65535~n", [Port]),
+ throw({error, {invalid_port, Port}}).
+
+tcp_listener_addresses_auto(Port) ->
+ lists:append([tcp_listener_addresses(Listener) ||
+ Listener <- port_to_listeners(Port)]).
+
+-spec tcp_listener_spec
+ (name_prefix(), address(), [gen_tcp:listen_option()], module(), module(),
+ any(), protocol(), non_neg_integer(), label()) ->
+ supervisor:child_spec().
+
+tcp_listener_spec(NamePrefix, {IPAddress, Port, Family}, SocketOpts,
+ Transport, ProtoSup, ProtoOpts, Protocol, NumAcceptors, Label) ->
+ Args = [IPAddress, Port, Transport, [Family | SocketOpts], ProtoSup, ProtoOpts,
+ {?MODULE, tcp_listener_started, [Protocol, SocketOpts]},
+ {?MODULE, tcp_listener_stopped, [Protocol, SocketOpts]},
+ NumAcceptors, Label],
+ {rabbit_misc:tcp_name(NamePrefix, IPAddress, Port),
+ {tcp_listener_sup, start_link, Args},
+ transient, infinity, supervisor, [tcp_listener_sup]}.
+
+-spec ranch_ref(#listener{} | [{atom(), any()}] | 'undefined') -> ranch:ref() | undefined.
+ranch_ref(#listener{port = Port}) ->
+ [{IPAddress, Port, _Family} | _] = tcp_listener_addresses(Port),
+ {acceptor, IPAddress, Port};
+ranch_ref(Listener) when is_list(Listener) ->
+ Port = rabbit_misc:pget(port, Listener),
+ [{IPAddress, Port, _Family} | _] = tcp_listener_addresses(Port),
+ {acceptor, IPAddress, Port};
+ranch_ref(undefined) ->
+ undefined.
+
+-spec ranch_ref(inet:ip_address(), ip_port()) -> ranch:ref().
+
+%% Returns a reference that identifies a TCP listener in Ranch.
+ranch_ref(IPAddress, Port) ->
+ {acceptor, IPAddress, Port}.
+
+-spec ranch_ref_of_protocol(atom()) -> ranch:ref() | undefined.
+ranch_ref_of_protocol(Protocol) ->
+ ranch_ref(listener_of_protocol(Protocol)).
+
+-spec listener_of_protocol(atom()) -> #listener{}.
+listener_of_protocol(Protocol) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ MatchSpec = #listener{
+ node = node(),
+ protocol = Protocol,
+ _ = '_'
+ },
+ case mnesia:match_object(rabbit_listener, MatchSpec, read) of
+ [] -> undefined;
+ [Row] -> Row
+ end
+ end).
+
+-spec stop_ranch_listener_of_protocol(atom()) -> ok | {error, not_found}.
+stop_ranch_listener_of_protocol(Protocol) ->
+ case rabbit_networking:ranch_ref_of_protocol(Protocol) of
+ undefined -> ok;
+ Ref ->
+ rabbit_log:debug("Stopping Ranch listener for protocol ~s", [Protocol]),
+ ranch:stop_listener(Ref)
+ end.
+
+-spec start_tcp_listener(
+ listener_config(), integer()) -> 'ok' | {'error', term()}.
+
+start_tcp_listener(Listener, NumAcceptors) ->
+ start_listener(Listener, NumAcceptors, amqp, "TCP listener", tcp_opts()).
+
+-spec start_ssl_listener(
+ listener_config(), rabbit_types:infos(), integer()) -> 'ok' | {'error', term()}.
+
+start_ssl_listener(Listener, SslOpts, NumAcceptors) ->
+ start_listener(Listener, NumAcceptors, 'amqp/ssl', "TLS (SSL) listener", tcp_opts() ++ SslOpts).
+
+
+-spec start_listener(
+ listener_config(), integer(), protocol(), label(), list()) -> 'ok' | {'error', term()}.
+start_listener(Listener, NumAcceptors, Protocol, Label, Opts) ->
+ lists:foldl(fun (Address, ok) ->
+ start_listener0(Address, NumAcceptors, Protocol, Label, Opts);
+ (_Address, {error, _} = Error) ->
+ Error
+ end, ok, tcp_listener_addresses(Listener)).
+
+start_listener0(Address, NumAcceptors, Protocol, Label, Opts) ->
+ Transport = transport(Protocol),
+ Spec = tcp_listener_spec(rabbit_tcp_listener_sup, Address, Opts,
+ Transport, rabbit_connection_sup, [], Protocol,
+ NumAcceptors, Label),
+ case supervisor:start_child(rabbit_sup, Spec) of
+ {ok, _} -> ok;
+ {error, {{shutdown, {failed_to_start_child, _,
+ {shutdown, {failed_to_start_child, _,
+ {listen_error, _, PosixError}}}}}, _}} ->
+ {IPAddress, Port, _Family} = Address,
+ {error, {could_not_start_listener, rabbit_misc:ntoa(IPAddress), Port, PosixError}};
+ {error, Other} ->
+ {IPAddress, Port, _Family} = Address,
+ {error, {could_not_start_listener, rabbit_misc:ntoa(IPAddress), Port, Other}}
+ end.
+
+transport(Protocol) ->
+ case Protocol of
+ amqp -> ranch_tcp;
+ 'amqp/ssl' -> ranch_ssl
+ end.
+
+-spec stop_tcp_listener(listener_config()) -> 'ok'.
+
+stop_tcp_listener(Listener) ->
+ [stop_tcp_listener0(Address) ||
+ Address <- tcp_listener_addresses(Listener)],
+ ok.
+
+stop_tcp_listener0({IPAddress, Port, _Family}) ->
+ Name = rabbit_misc:tcp_name(rabbit_tcp_listener_sup, IPAddress, Port),
+ ok = supervisor:terminate_child(rabbit_sup, Name),
+ ok = supervisor:delete_child(rabbit_sup, Name).
+
+-spec tcp_listener_started
+ (_, _,
+ string() |
+ {byte(),byte(),byte(),byte()} |
+ {char(),char(),char(),char(),char(),char(),char(),char()}, _) ->
+ 'ok'.
+
+tcp_listener_started(Protocol, Opts, IPAddress, Port) ->
+ %% We need the ip to distinguish e.g. 0.0.0.0 and 127.0.0.1
+ %% We need the host so we can distinguish multiple instances of the above
+ %% in a cluster.
+ ok = mnesia:dirty_write(
+ rabbit_listener,
+ #listener{node = node(),
+ protocol = Protocol,
+ host = tcp_host(IPAddress),
+ ip_address = IPAddress,
+ port = Port,
+ opts = Opts}).
+
+-spec tcp_listener_stopped
+ (_, _,
+ string() |
+ {byte(),byte(),byte(),byte()} |
+ {char(),char(),char(),char(),char(),char(),char(),char()},
+ _) ->
+ 'ok'.
+
+tcp_listener_stopped(Protocol, Opts, IPAddress, Port) ->
+ ok = mnesia:dirty_delete_object(
+ rabbit_listener,
+ #listener{node = node(),
+ protocol = Protocol,
+ host = tcp_host(IPAddress),
+ ip_address = IPAddress,
+ port = Port,
+ opts = Opts}).
+
+-spec record_distribution_listener() -> ok | no_return().
+
+record_distribution_listener() ->
+ {Name, Host} = rabbit_nodes:parts(node()),
+ case erl_epmd:port_please(list_to_atom(Name), Host, infinity) of
+ {port, Port, _Version} ->
+ tcp_listener_started(clustering, [], {0,0,0,0,0,0,0,0}, Port);
+ noport ->
+ throw({error, no_epmd_port})
+ end.
+
+-spec active_listeners() -> [rabbit_types:listener()].
+
+active_listeners() ->
+ rabbit_misc:dirty_read_all(rabbit_listener).
+
+-spec node_listeners(node()) -> [rabbit_types:listener()].
+
+node_listeners(Node) ->
+ mnesia:dirty_read(rabbit_listener, Node).
+
+-spec node_client_listeners(node()) -> [rabbit_types:listener()].
+
+node_client_listeners(Node) ->
+ case node_listeners(Node) of
+ [] -> [];
+ Xs ->
+ lists:filter(fun (#listener{protocol = clustering}) -> false;
+ (_) -> true
+ end, Xs)
+ end.
+
+-spec on_node_down(node()) -> 'ok'.
+
+on_node_down(Node) ->
+ case lists:member(Node, nodes()) of
+ false ->
+ rabbit_log:info(
+ "Node ~s is down, deleting its listeners~n", [Node]),
+ ok = mnesia:dirty_delete(rabbit_listener, Node);
+ true ->
+ rabbit_log:info(
+ "Keeping ~s listeners: the node is already back~n", [Node])
+ end.
+
+-spec register_connection(pid()) -> ok.
+
+register_connection(Pid) -> pg_local:join(rabbit_connections, Pid).
+
+-spec unregister_connection(pid()) -> ok.
+
+unregister_connection(Pid) -> pg_local:leave(rabbit_connections, Pid).
+
+-spec connections() -> [rabbit_types:connection()].
+
+connections() ->
+ Nodes = rabbit_nodes:all_running(),
+ rabbit_misc:append_rpc_all_nodes(Nodes, rabbit_networking, connections_local, [], ?RPC_TIMEOUT).
+
+-spec local_connections() -> [rabbit_types:connection()].
+%% @doc Returns pids of AMQP 0-9-1 and AMQP 1.0 connections local to this node.
+local_connections() ->
+ connections_local().
+
+-spec connections_local() -> [rabbit_types:connection()].
+%% @deprecated Prefer {@link local_connections}
+connections_local() -> pg_local:get_members(rabbit_connections).
+
+-spec register_non_amqp_connection(pid()) -> ok.
+
+register_non_amqp_connection(Pid) -> pg_local:join(rabbit_non_amqp_connections, Pid).
+
+-spec unregister_non_amqp_connection(pid()) -> ok.
+
+unregister_non_amqp_connection(Pid) -> pg_local:leave(rabbit_non_amqp_connections, Pid).
+
+-spec non_amqp_connections() -> [rabbit_types:connection()].
+
+non_amqp_connections() ->
+ Nodes = rabbit_nodes:all_running(),
+ rabbit_misc:append_rpc_all_nodes(Nodes, rabbit_networking, local_non_amqp_connections, [], ?RPC_TIMEOUT).
+
+-spec local_non_amqp_connections() -> [rabbit_types:connection()].
+local_non_amqp_connections() ->
+ pg_local:get_members(rabbit_non_amqp_connections).
+
+-spec connection_info_keys() -> rabbit_types:info_keys().
+
+connection_info_keys() -> rabbit_reader:info_keys().
+
+-spec connection_info(rabbit_types:connection()) -> rabbit_types:infos().
+
+connection_info(Pid) -> rabbit_reader:info(Pid).
+
+-spec connection_info(rabbit_types:connection(), rabbit_types:info_keys()) ->
+ rabbit_types:infos().
+
+connection_info(Pid, Items) -> rabbit_reader:info(Pid, Items).
+
+-spec connection_info_all() -> [rabbit_types:infos()].
+
+connection_info_all() -> cmap(fun (Q) -> connection_info(Q) end).
+
+-spec connection_info_all(rabbit_types:info_keys()) ->
+ [rabbit_types:infos()].
+
+connection_info_all(Items) -> cmap(fun (Q) -> connection_info(Q, Items) end).
+
+emit_connection_info_all(Nodes, Items, Ref, AggregatorPid) ->
+ Pids = [ spawn_link(Node, rabbit_networking, emit_connection_info_local, [Items, Ref, AggregatorPid]) || Node <- Nodes ],
+ rabbit_control_misc:await_emitters_termination(Pids),
+ ok.
+
+emit_connection_info_local(Items, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map_with_exit_handler(
+ AggregatorPid, Ref, fun(Q) -> connection_info(Q, Items) end,
+ connections_local()).
+
+-spec close_connection(pid(), string()) -> 'ok'.
+
+close_connection(Pid, Explanation) ->
+ case lists:member(Pid, connections()) of
+ true ->
+ Res = rabbit_reader:shutdown(Pid, Explanation),
+ rabbit_log:info("Closing connection ~p because ~p~n", [Pid, Explanation]),
+ Res;
+ false ->
+ rabbit_log:warning("Asked to close connection ~p (reason: ~p) "
+ "but no running cluster node reported it as an active connection. Was it already closed? ~n",
+ [Pid, Explanation]),
+ ok
+ end.
+
+-spec close_connections([pid()], string()) -> 'ok'.
+close_connections(Pids, Explanation) ->
+ [close_connection(Pid, Explanation) || Pid <- Pids],
+ ok.
+
+%% Meant to be used by tests only
+-spec close_all_connections(string()) -> 'ok'.
+close_all_connections(Explanation) ->
+ Pids = connections(),
+ [close_connection(Pid, Explanation) || Pid <- Pids],
+ ok.
+
+-spec force_connection_event_refresh(reference()) -> 'ok'.
+force_connection_event_refresh(Ref) ->
+ [rabbit_reader:force_event_refresh(C, Ref) || C <- connections()],
+ ok.
+
+-spec force_non_amqp_connection_event_refresh(reference()) -> 'ok'.
+force_non_amqp_connection_event_refresh(Ref) ->
+ [gen_server:cast(Pid, {force_event_refresh, Ref}) || Pid <- non_amqp_connections()],
+ ok.
+
+-spec failed_to_recv_proxy_header(_, _) -> no_return().
+failed_to_recv_proxy_header(Ref, Error) ->
+ Msg = case Error of
+ closed -> "error when receiving proxy header: TCP socket was ~p prematurely";
+ _Other -> "error when receiving proxy header: ~p"
+ end,
+ rabbit_log:debug(Msg, [Error]),
+ % The following call will clean up resources then exit
+ _ = ranch:handshake(Ref),
+ exit({shutdown, failed_to_recv_proxy_header}).
+
+handshake(Ref, ProxyProtocolEnabled) ->
+ case ProxyProtocolEnabled of
+ true ->
+ case ranch:recv_proxy_header(Ref, 3000) of
+ {error, Error} ->
+ failed_to_recv_proxy_header(Ref, Error);
+ {error, protocol_error, Error} ->
+ failed_to_recv_proxy_header(Ref, Error);
+ {ok, ProxyInfo} ->
+ {ok, Sock} = ranch:handshake(Ref),
+ setup_socket(Sock),
+ {ok, {rabbit_proxy_socket, Sock, ProxyInfo}}
+ end;
+ false ->
+ {ok, Sock} = ranch:handshake(Ref),
+ setup_socket(Sock),
+ {ok, Sock}
+ end.
+
+setup_socket(Sock) ->
+ ok = tune_buffer_size(Sock),
+ ok = file_handle_cache:obtain().
+
+tune_buffer_size(Sock) ->
+ case tune_buffer_size1(Sock) of
+ ok -> ok;
+ {error, _} -> rabbit_net:fast_close(Sock),
+ exit(normal)
+ end.
+
+tune_buffer_size1(Sock) ->
+ case rabbit_net:getopts(Sock, [sndbuf, recbuf, buffer]) of
+ {ok, BufSizes} -> BufSz = lists:max([Sz || {_Opt, Sz} <- BufSizes]),
+ rabbit_net:setopts(Sock, [{buffer, BufSz}]);
+ Error -> Error
+ end.
+
+%%--------------------------------------------------------------------
+
+tcp_host(IPAddress) ->
+ rabbit_net:tcp_host(IPAddress).
+
+cmap(F) -> rabbit_misc:filter_exit_map(F, connections()).
+
+tcp_opts() ->
+ {ok, ConfigOpts} = application:get_env(rabbit, tcp_listen_options),
+ ConfigOpts.
+
+%% inet_parse:address takes care of ip string, like "0.0.0.0"
+%% inet:getaddr returns immediately for ip tuple {0,0,0,0},
+%% and runs 'inet_gethost' port process for dns lookups.
+%% On Windows inet:getaddr runs dns resolver for ip string, which may fail.
+getaddr(Host, Family) ->
+ case inet_parse:address(Host) of
+ {ok, IPAddress} -> [{IPAddress, resolve_family(IPAddress, Family)}];
+ {error, _} -> gethostaddr(Host, Family)
+ end.
+
+gethostaddr(Host, auto) ->
+ Lookups = [{Family, inet:getaddr(Host, Family)} || Family <- [inet, inet6]],
+ case [{IP, Family} || {Family, {ok, IP}} <- Lookups] of
+ [] -> host_lookup_error(Host, Lookups);
+ IPs -> IPs
+ end;
+
+gethostaddr(Host, Family) ->
+ case inet:getaddr(Host, Family) of
+ {ok, IPAddress} -> [{IPAddress, Family}];
+ {error, Reason} -> host_lookup_error(Host, Reason)
+ end.
+
+-spec host_lookup_error(_, _) -> no_return().
+host_lookup_error(Host, Reason) ->
+ rabbit_log:error("invalid host ~p - ~p~n", [Host, Reason]),
+ throw({error, {invalid_host, Host, Reason}}).
+
+resolve_family({_,_,_,_}, auto) -> inet;
+resolve_family({_,_,_,_,_,_,_,_}, auto) -> inet6;
+resolve_family(IP, auto) -> throw({error, {strange_family, IP}});
+resolve_family(_, F) -> F.
+
+%%--------------------------------------------------------------------
+
+%% There are three kinds of machine (for our purposes).
+%%
+%% * Those which treat IPv4 addresses as a special kind of IPv6 address
+%% ("Single stack")
+%% - Linux by default, Windows Vista and later
+%% - We also treat any (hypothetical?) IPv6-only machine the same way
+%% * Those which consider IPv6 and IPv4 to be completely separate things
+%% ("Dual stack")
+%% - OpenBSD, Windows XP / 2003, Linux if so configured
+%% * Those which do not support IPv6.
+%% - Ancient/weird OSes, Linux if so configured
+%%
+%% How to reconfigure Linux to test this:
+%% Single stack (default):
+%% echo 0 > /proc/sys/net/ipv6/bindv6only
+%% Dual stack:
+%% echo 1 > /proc/sys/net/ipv6/bindv6only
+%% IPv4 only:
+%% add ipv6.disable=1 to GRUB_CMDLINE_LINUX_DEFAULT in /etc/default/grub then
+%% sudo update-grub && sudo reboot
+%%
+%% This matters in (and only in) the case where the sysadmin (or the
+%% app descriptor) has only supplied a port and we wish to bind to
+%% "all addresses". This means different things depending on whether
+%% we're single or dual stack. On single stack binding to "::"
+%% implicitly includes all IPv4 addresses, and subsequently attempting
+%% to bind to "0.0.0.0" will fail. On dual stack, binding to "::" will
+%% only bind to IPv6 addresses, and we need another listener bound to
+%% "0.0.0.0" for IPv4. Finally, on IPv4-only systems we of course only
+%% want to bind to "0.0.0.0".
+%%
+%% Unfortunately it seems there is no way to detect single vs dual stack
+%% apart from attempting to bind to the port.
+port_to_listeners(Port) ->
+ IPv4 = {"0.0.0.0", Port, inet},
+ IPv6 = {"::", Port, inet6},
+ case ipv6_status(?FIRST_TEST_BIND_PORT) of
+ single_stack -> [IPv6];
+ ipv6_only -> [IPv6];
+ dual_stack -> [IPv6, IPv4];
+ ipv4_only -> [IPv4]
+ end.
+
+ipv6_status(TestPort) ->
+ IPv4 = [inet, {ip, {0,0,0,0}}],
+ IPv6 = [inet6, {ip, {0,0,0,0,0,0,0,0}}],
+ case gen_tcp:listen(TestPort, IPv6) of
+ {ok, LSock6} ->
+ case gen_tcp:listen(TestPort, IPv4) of
+ {ok, LSock4} ->
+ %% Dual stack
+ gen_tcp:close(LSock6),
+ gen_tcp:close(LSock4),
+ dual_stack;
+ %% Checking the error here would only let us
+ %% distinguish single stack IPv6 / IPv4 vs IPv6 only,
+ %% which we figure out below anyway.
+ {error, _} ->
+ gen_tcp:close(LSock6),
+ case gen_tcp:listen(TestPort, IPv4) of
+ %% Single stack
+ {ok, LSock4} -> gen_tcp:close(LSock4),
+ single_stack;
+ %% IPv6-only machine. Welcome to the future.
+ {error, eafnosupport} -> ipv6_only; %% Linux
+ {error, eprotonosupport}-> ipv6_only; %% FreeBSD
+ %% Dual stack machine with something already
+ %% on IPv4.
+ {error, _} -> ipv6_status(TestPort + 1)
+ end
+ end;
+ %% IPv4-only machine. Welcome to the 90s.
+ {error, eafnosupport} -> %% Linux
+ ipv4_only;
+ {error, eprotonosupport} -> %% FreeBSD
+ ipv4_only;
+ %% Port in use
+ {error, _} ->
+ ipv6_status(TestPort + 1)
+ end.
diff --git a/deps/rabbit/src/rabbit_node_monitor.erl b/deps/rabbit/src/rabbit_node_monitor.erl
new file mode 100644
index 0000000000..b56180c54c
--- /dev/null
+++ b/deps/rabbit/src/rabbit_node_monitor.erl
@@ -0,0 +1,926 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_node_monitor).
+
+%% Transitional step until we can require Erlang/OTP 21 and
+%% use the now recommended try/catch syntax for obtaining the stack trace.
+-compile(nowarn_deprecated_function).
+
+-behaviour(gen_server).
+
+-export([start_link/0]).
+-export([running_nodes_filename/0,
+ cluster_status_filename/0, quorum_filename/0, default_quorum_filename/0,
+ prepare_cluster_status_files/0,
+ write_cluster_status/1, read_cluster_status/0,
+ update_cluster_status/0, reset_cluster_status/0]).
+-export([notify_node_up/0, notify_joined_cluster/0, notify_left_cluster/1]).
+-export([partitions/0, partitions/1, status/1, subscribe/1]).
+-export([pause_partition_guard/0]).
+-export([global_sync/0]).
+
+%% gen_server callbacks
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+ %% Utils
+-export([all_rabbit_nodes_up/0, run_outside_applications/2, ping_all/0,
+ alive_nodes/1, alive_rabbit_nodes/1]).
+
+-define(SERVER, ?MODULE).
+-define(NODE_REPLY_TIMEOUT, 5000).
+-define(RABBIT_UP_RPC_TIMEOUT, 2000).
+-define(RABBIT_DOWN_PING_INTERVAL, 1000).
+
+-record(state, {monitors, partitions, subscribers, down_ping_timer,
+ keepalive_timer, autoheal, guid, node_guids}).
+
+%%----------------------------------------------------------------------------
+%% Start
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+%%----------------------------------------------------------------------------
+%% Cluster file operations
+%%----------------------------------------------------------------------------
+
+%% The cluster file information is kept in two files. The "cluster
+%% status file" contains all the clustered nodes and the disc nodes.
+%% The "running nodes file" contains the currently running nodes or
+%% the running nodes at shutdown when the node is down.
+%%
+%% We strive to keep the files up to date and we rely on this
+%% assumption in various situations. Obviously when mnesia is offline
+%% the information we have will be outdated, but it cannot be
+%% otherwise.
+
+-spec running_nodes_filename() -> string().
+
+running_nodes_filename() ->
+ filename:join(rabbit_mnesia:dir(), "nodes_running_at_shutdown").
+
+-spec cluster_status_filename() -> string().
+
+cluster_status_filename() ->
+ filename:join(rabbit_mnesia:dir(), "cluster_nodes.config").
+
+quorum_filename() ->
+ ra_env:data_dir().
+
+default_quorum_filename() ->
+ filename:join(rabbit_mnesia:dir(), "quorum").
+
+-spec prepare_cluster_status_files() -> 'ok' | no_return().
+
+prepare_cluster_status_files() ->
+ rabbit_mnesia:ensure_mnesia_dir(),
+ RunningNodes1 = case try_read_file(running_nodes_filename()) of
+ {ok, [Nodes]} when is_list(Nodes) -> Nodes;
+ {ok, Other} -> corrupt_cluster_status_files(Other);
+ {error, enoent} -> []
+ end,
+ ThisNode = [node()],
+ %% The running nodes file might contain a set or a list, in case
+ %% of the legacy file
+ RunningNodes2 = lists:usort(ThisNode ++ RunningNodes1),
+ {AllNodes1, DiscNodes} =
+ case try_read_file(cluster_status_filename()) of
+ {ok, [{AllNodes, DiscNodes0}]} ->
+ {AllNodes, DiscNodes0};
+ {ok, [AllNodes0]} when is_list(AllNodes0) ->
+ {legacy_cluster_nodes(AllNodes0), legacy_disc_nodes(AllNodes0)};
+ {ok, Files} ->
+ corrupt_cluster_status_files(Files);
+ {error, enoent} ->
+ LegacyNodes = legacy_cluster_nodes([]),
+ {LegacyNodes, LegacyNodes}
+ end,
+ AllNodes2 = lists:usort(AllNodes1 ++ RunningNodes2),
+ ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}).
+
+-spec corrupt_cluster_status_files(any()) -> no_return().
+
+corrupt_cluster_status_files(F) ->
+ throw({error, corrupt_cluster_status_files, F}).
+
+-spec write_cluster_status(rabbit_mnesia:cluster_status()) -> 'ok'.
+
+write_cluster_status({All, Disc, Running}) ->
+ ClusterStatusFN = cluster_status_filename(),
+ Res = case rabbit_file:write_term_file(ClusterStatusFN, [{All, Disc}]) of
+ ok ->
+ RunningNodesFN = running_nodes_filename(),
+ {RunningNodesFN,
+ rabbit_file:write_term_file(RunningNodesFN, [Running])};
+ E1 = {error, _} ->
+ {ClusterStatusFN, E1}
+ end,
+ case Res of
+ {_, ok} -> ok;
+ {FN, {error, E2}} -> throw({error, {could_not_write_file, FN, E2}})
+ end.
+
+-spec read_cluster_status() -> rabbit_mnesia:cluster_status().
+
+read_cluster_status() ->
+ case {try_read_file(cluster_status_filename()),
+ try_read_file(running_nodes_filename())} of
+ {{ok, [{All, Disc}]}, {ok, [Running]}} when is_list(Running) ->
+ {All, Disc, Running};
+ {Stat, Run} ->
+ throw({error, {corrupt_or_missing_cluster_files, Stat, Run}})
+ end.
+
+-spec update_cluster_status() -> 'ok'.
+
+update_cluster_status() ->
+ {ok, Status} = rabbit_mnesia:cluster_status_from_mnesia(),
+ write_cluster_status(Status).
+
+-spec reset_cluster_status() -> 'ok'.
+
+reset_cluster_status() ->
+ write_cluster_status({[node()], [node()], [node()]}).
+
+%%----------------------------------------------------------------------------
+%% Cluster notifications
+%%----------------------------------------------------------------------------
+
+-spec notify_node_up() -> 'ok'.
+
+notify_node_up() ->
+ gen_server:cast(?SERVER, notify_node_up).
+
+-spec notify_joined_cluster() -> 'ok'.
+
+notify_joined_cluster() ->
+ Nodes = rabbit_nodes:all_running() -- [node()],
+ gen_server:abcast(Nodes, ?SERVER,
+ {joined_cluster, node(), rabbit_mnesia:node_type()}),
+ ok.
+
+-spec notify_left_cluster(node()) -> 'ok'.
+
+notify_left_cluster(Node) ->
+ Nodes = rabbit_nodes:all_running(),
+ gen_server:abcast(Nodes, ?SERVER, {left_cluster, Node}),
+ ok.
+
+%%----------------------------------------------------------------------------
+%% Server calls
+%%----------------------------------------------------------------------------
+
+-spec partitions() -> [node()].
+
+partitions() ->
+ gen_server:call(?SERVER, partitions, infinity).
+
+-spec partitions([node()]) -> [{node(), [node()]}].
+
+partitions(Nodes) ->
+ {Replies, _} = gen_server:multi_call(Nodes, ?SERVER, partitions, ?NODE_REPLY_TIMEOUT),
+ Replies.
+
+-spec status([node()]) -> {[{node(), [node()]}], [node()]}.
+
+status(Nodes) ->
+ gen_server:multi_call(Nodes, ?SERVER, status, infinity).
+
+-spec subscribe(pid()) -> 'ok'.
+
+subscribe(Pid) ->
+ gen_server:cast(?SERVER, {subscribe, Pid}).
+
+%%----------------------------------------------------------------------------
+%% pause_minority/pause_if_all_down safety
+%%----------------------------------------------------------------------------
+
+%% If we are in a minority and pause_minority mode then a) we are
+%% going to shut down imminently and b) we should not confirm anything
+%% until then, since anything we confirm is likely to be lost.
+%%
+%% The same principles apply to a node which isn't part of the preferred
+%% partition when we are in pause_if_all_down mode.
+%%
+%% We could confirm something by having an HA queue see the pausing
+%% state (and fail over into it) before the node monitor stops us, or
+%% by using unmirrored queues and just having them vanish (and
+%% confirming messages as thrown away).
+%%
+%% So we have channels call in here before issuing confirms, to do a
+%% lightweight check that we have not entered a pausing state.
+
+-spec pause_partition_guard() -> 'ok' | 'pausing'.
+
+pause_partition_guard() ->
+ case get(pause_partition_guard) of
+ not_pause_mode ->
+ ok;
+ undefined ->
+ {ok, M} = application:get_env(rabbit, cluster_partition_handling),
+ case M of
+ pause_minority ->
+ pause_minority_guard([], ok);
+ {pause_if_all_down, PreferredNodes, _} ->
+ pause_if_all_down_guard(PreferredNodes, [], ok);
+ _ ->
+ put(pause_partition_guard, not_pause_mode),
+ ok
+ end;
+ {minority_mode, Nodes, LastState} ->
+ pause_minority_guard(Nodes, LastState);
+ {pause_if_all_down_mode, PreferredNodes, Nodes, LastState} ->
+ pause_if_all_down_guard(PreferredNodes, Nodes, LastState)
+ end.
+
+pause_minority_guard(LastNodes, LastState) ->
+ case nodes() of
+ LastNodes -> LastState;
+ _ -> NewState = case majority() of
+ false -> pausing;
+ true -> ok
+ end,
+ put(pause_partition_guard,
+ {minority_mode, nodes(), NewState}),
+ NewState
+ end.
+
+pause_if_all_down_guard(PreferredNodes, LastNodes, LastState) ->
+ case nodes() of
+ LastNodes -> LastState;
+ _ -> NewState = case in_preferred_partition(PreferredNodes) of
+ false -> pausing;
+ true -> ok
+ end,
+ put(pause_partition_guard,
+ {pause_if_all_down_mode, PreferredNodes, nodes(),
+ NewState}),
+ NewState
+ end.
+
+%%----------------------------------------------------------------------------
+%% "global" hang workaround.
+%%----------------------------------------------------------------------------
+
+%% This code works around a possible inconsistency in the "global"
+%% state, causing global:sync/0 to never return.
+%%
+%% 1. A process is spawned.
+%% 2. If after 15", global:sync() didn't return, the "global"
+%% state is parsed.
+%% 3. If it detects that a sync is blocked for more than 10",
+%% the process sends fake nodedown/nodeup events to the two
+%% nodes involved (one local, one remote).
+%% 4. Both "global" instances restart their synchronisation.
+%% 5. globao:sync() finally returns.
+%%
+%% FIXME: Remove this workaround, once we got rid of the change to
+%% "dist_auto_connect" and fixed the bugs uncovered.
+
+global_sync() ->
+ Pid = spawn(fun workaround_global_hang/0),
+ ok = global:sync(),
+ Pid ! global_sync_done,
+ ok.
+
+workaround_global_hang() ->
+ receive
+ global_sync_done ->
+ ok
+ after 10000 ->
+ find_blocked_global_peers()
+ end.
+
+find_blocked_global_peers() ->
+ Snapshot1 = snapshot_global_dict(),
+ timer:sleep(10000),
+ Snapshot2 = snapshot_global_dict(),
+ find_blocked_global_peers1(Snapshot2, Snapshot1).
+
+snapshot_global_dict() ->
+ {status, _, _, [Dict | _]} = sys:get_status(global_name_server),
+ [E || {{sync_tag_his, _}, _} = E <- Dict].
+
+find_blocked_global_peers1([{{sync_tag_his, Peer}, _} = Item | Rest],
+ OlderSnapshot) ->
+ case lists:member(Item, OlderSnapshot) of
+ true -> unblock_global_peer(Peer);
+ false -> ok
+ end,
+ find_blocked_global_peers1(Rest, OlderSnapshot);
+find_blocked_global_peers1([], _) ->
+ ok.
+
+unblock_global_peer(PeerNode) ->
+ ThisNode = node(),
+ PeerState = rpc:call(PeerNode, sys, get_status, [global_name_server]),
+ error_logger:info_msg(
+ "Global hang workaround: global state on ~s seems broken~n"
+ " * Peer global state: ~p~n"
+ " * Local global state: ~p~n"
+ "Faking nodedown/nodeup between ~s and ~s~n",
+ [PeerNode, PeerState, sys:get_status(global_name_server),
+ PeerNode, ThisNode]),
+ {global_name_server, ThisNode} ! {nodedown, PeerNode},
+ {global_name_server, PeerNode} ! {nodedown, ThisNode},
+ {global_name_server, ThisNode} ! {nodeup, PeerNode},
+ {global_name_server, PeerNode} ! {nodeup, ThisNode},
+ ok.
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
+
+init([]) ->
+ %% We trap exits so that the supervisor will not just kill us. We
+ %% want to be sure that we are not going to be killed while
+ %% writing out the cluster status files - bad things can then
+ %% happen.
+ process_flag(trap_exit, true),
+ net_kernel:monitor_nodes(true, [nodedown_reason]),
+ {ok, _} = mnesia:subscribe(system),
+ %% If the node has been restarted, Mnesia can trigger a system notification
+ %% before the monitor subscribes to receive them. To avoid autoheal blocking due to
+ %% the inconsistent database event never arriving, we being monitoring all running
+ %% nodes as early as possible. The rest of the monitoring ops will only be triggered
+ %% when notifications arrive.
+ Nodes = possibly_partitioned_nodes(),
+ startup_log(Nodes),
+ Monitors = lists:foldl(fun(Node, Monitors0) ->
+ pmon:monitor({rabbit, Node}, Monitors0)
+ end, pmon:new(), Nodes),
+ {ok, ensure_keepalive_timer(#state{monitors = Monitors,
+ subscribers = pmon:new(),
+ partitions = [],
+ guid = rabbit_guid:gen(),
+ node_guids = maps:new(),
+ autoheal = rabbit_autoheal:init()})}.
+
+handle_call(partitions, _From, State = #state{partitions = Partitions}) ->
+ {reply, Partitions, State};
+
+handle_call(status, _From, State = #state{partitions = Partitions}) ->
+ {reply, [{partitions, Partitions},
+ {nodes, [node() | nodes()]}], State};
+
+handle_call(_Request, _From, State) ->
+ {noreply, State}.
+
+handle_cast(notify_node_up, State = #state{guid = GUID}) ->
+ Nodes = rabbit_nodes:all_running() -- [node()],
+ gen_server:abcast(Nodes, ?SERVER,
+ {node_up, node(), rabbit_mnesia:node_type(), GUID}),
+ %% register other active rabbits with this rabbit
+ DiskNodes = rabbit_mnesia:cluster_nodes(disc),
+ [gen_server:cast(?SERVER, {node_up, N, case lists:member(N, DiskNodes) of
+ true -> disc;
+ false -> ram
+ end}) || N <- Nodes],
+ {noreply, State};
+
+%%----------------------------------------------------------------------------
+%% Partial partition detection
+%%
+%% Every node generates a GUID each time it starts, and announces that
+%% GUID in 'node_up', with 'announce_guid' sent by return so the new
+%% node knows the GUIDs of the others. These GUIDs are sent in all the
+%% partial partition related messages to ensure that we ignore partial
+%% partition messages from before we restarted (to avoid getting stuck
+%% in a loop).
+%%
+%% When one node gets nodedown from another, it then sends
+%% 'check_partial_partition' to all the nodes it still thinks are
+%% alive. If any of those (intermediate) nodes still see the "down"
+%% node as up, they inform it that this has happened. The original
+%% node (in 'ignore', 'pause_if_all_down' or 'autoheal' mode) will then
+%% disconnect from the intermediate node to "upgrade" to a full
+%% partition.
+%%
+%% In pause_minority mode it will instead immediately pause until all
+%% nodes come back. This is because the contract for pause_minority is
+%% that nodes should never sit in a partitioned state - if it just
+%% disconnected, it would become a minority, pause, realise it's not
+%% in a minority any more, and come back, still partitioned (albeit no
+%% longer partially).
+%% ----------------------------------------------------------------------------
+
+handle_cast({node_up, Node, NodeType, GUID},
+ State = #state{guid = MyGUID,
+ node_guids = GUIDs}) ->
+ cast(Node, {announce_guid, node(), MyGUID}),
+ GUIDs1 = maps:put(Node, GUID, GUIDs),
+ handle_cast({node_up, Node, NodeType}, State#state{node_guids = GUIDs1});
+
+handle_cast({announce_guid, Node, GUID}, State = #state{node_guids = GUIDs}) ->
+ {noreply, State#state{node_guids = maps:put(Node, GUID, GUIDs)}};
+
+handle_cast({check_partial_partition, Node, Rep, NodeGUID, MyGUID, RepGUID},
+ State = #state{guid = MyGUID,
+ node_guids = GUIDs}) ->
+ case lists:member(Node, rabbit_nodes:all_running()) andalso
+ maps:find(Node, GUIDs) =:= {ok, NodeGUID} of
+ true -> spawn_link( %%[1]
+ fun () ->
+ case rpc:call(Node, rabbit, is_running, []) of
+ {badrpc, _} -> ok;
+ _ ->
+ rabbit_log:warning("Received a 'DOWN' message"
+ " from ~p but still can"
+ " communicate with it ~n",
+ [Node]),
+ cast(Rep, {partial_partition,
+ Node, node(), RepGUID})
+ end
+ end);
+ false -> ok
+ end,
+ {noreply, State};
+%% [1] We checked that we haven't heard the node go down - but we
+%% really should make sure we can actually communicate with
+%% it. Otherwise there's a race where we falsely detect a partial
+%% partition.
+%%
+%% Now of course the rpc:call/4 may take a long time to return if
+%% connectivity with the node is actually interrupted - but that's OK,
+%% we only really want to do something in a timely manner if
+%% connectivity is OK. However, of course as always we must not block
+%% the node monitor, so we do the check in a separate process.
+
+handle_cast({check_partial_partition, _Node, _Reporter,
+ _NodeGUID, _GUID, _ReporterGUID}, State) ->
+ {noreply, State};
+
+handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID},
+ State = #state{guid = MyGUID}) ->
+ FmtBase = "Partial partition detected:~n"
+ " * We saw DOWN from ~s~n"
+ " * We can still see ~s which can see ~s~n",
+ ArgsBase = [NotReallyDown, Proxy, NotReallyDown],
+ case application:get_env(rabbit, cluster_partition_handling) of
+ {ok, pause_minority} ->
+ rabbit_log:error(
+ FmtBase ++ " * pause_minority mode enabled~n"
+ "We will therefore pause until the *entire* cluster recovers~n",
+ ArgsBase),
+ await_cluster_recovery(fun all_nodes_up/0),
+ {noreply, State};
+ {ok, {pause_if_all_down, PreferredNodes, _}} ->
+ case in_preferred_partition(PreferredNodes) of
+ true -> rabbit_log:error(
+ FmtBase ++ "We will therefore intentionally "
+ "disconnect from ~s~n", ArgsBase ++ [Proxy]),
+ upgrade_to_full_partition(Proxy);
+ false -> rabbit_log:info(
+ FmtBase ++ "We are about to pause, no need "
+ "for further actions~n", ArgsBase)
+ end,
+ {noreply, State};
+ {ok, _} ->
+ rabbit_log:error(
+ FmtBase ++ "We will therefore intentionally disconnect from ~s~n",
+ ArgsBase ++ [Proxy]),
+ upgrade_to_full_partition(Proxy),
+ {noreply, State}
+ end;
+
+handle_cast({partial_partition, _GUID, _Reporter, _Proxy}, State) ->
+ {noreply, State};
+
+%% Sometimes it appears the Erlang VM does not give us nodedown
+%% messages reliably when another node disconnects from us. Therefore
+%% we are told just before the disconnection so we can reciprocate.
+handle_cast({partial_partition_disconnect, Other}, State) ->
+ rabbit_log:error("Partial partition disconnect from ~s~n", [Other]),
+ disconnect(Other),
+ {noreply, State};
+
+%% Note: when updating the status file, we can't simply write the
+%% mnesia information since the message can (and will) overtake the
+%% mnesia propagation.
+handle_cast({node_up, Node, NodeType},
+ State = #state{monitors = Monitors}) ->
+ rabbit_log:info("rabbit on node ~p up~n", [Node]),
+ {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
+ write_cluster_status({add_node(Node, AllNodes),
+ case NodeType of
+ disc -> add_node(Node, DiscNodes);
+ ram -> DiscNodes
+ end,
+ add_node(Node, RunningNodes)}),
+ ok = handle_live_rabbit(Node),
+ Monitors1 = case pmon:is_monitored({rabbit, Node}, Monitors) of
+ true ->
+ Monitors;
+ false ->
+ pmon:monitor({rabbit, Node}, Monitors)
+ end,
+ {noreply, maybe_autoheal(State#state{monitors = Monitors1})};
+
+handle_cast({joined_cluster, Node, NodeType}, State) ->
+ {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
+ write_cluster_status({add_node(Node, AllNodes),
+ case NodeType of
+ disc -> add_node(Node, DiscNodes);
+ ram -> DiscNodes
+ end,
+ RunningNodes}),
+ {noreply, State};
+
+handle_cast({left_cluster, Node}, State) ->
+ {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
+ write_cluster_status({del_node(Node, AllNodes), del_node(Node, DiscNodes),
+ del_node(Node, RunningNodes)}),
+ {noreply, State};
+
+handle_cast({subscribe, Pid}, State = #state{subscribers = Subscribers}) ->
+ {noreply, State#state{subscribers = pmon:monitor(Pid, Subscribers)}};
+
+handle_cast(keepalive, State) ->
+ {noreply, State};
+
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason},
+ State = #state{monitors = Monitors, subscribers = Subscribers}) ->
+ rabbit_log:info("rabbit on node ~p down~n", [Node]),
+ {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
+ write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}),
+ [P ! {node_down, Node} || P <- pmon:monitored(Subscribers)],
+ {noreply, handle_dead_rabbit(
+ Node,
+ State#state{monitors = pmon:erase({rabbit, Node}, Monitors)})};
+
+handle_info({'DOWN', _MRef, process, Pid, _Reason},
+ State = #state{subscribers = Subscribers}) ->
+ {noreply, State#state{subscribers = pmon:erase(Pid, Subscribers)}};
+
+handle_info({nodedown, Node, Info}, State = #state{guid = MyGUID,
+ node_guids = GUIDs}) ->
+ rabbit_log:info("node ~p down: ~p~n",
+ [Node, proplists:get_value(nodedown_reason, Info)]),
+ Check = fun (N, CheckGUID, DownGUID) ->
+ cast(N, {check_partial_partition,
+ Node, node(), DownGUID, CheckGUID, MyGUID})
+ end,
+ case maps:find(Node, GUIDs) of
+ {ok, DownGUID} -> Alive = rabbit_nodes:all_running()
+ -- [node(), Node],
+ [case maps:find(N, GUIDs) of
+ {ok, CheckGUID} -> Check(N, CheckGUID, DownGUID);
+ error -> ok
+ end || N <- Alive];
+ error -> ok
+ end,
+ {noreply, handle_dead_node(Node, State)};
+
+handle_info({nodeup, Node, _Info}, State) ->
+ rabbit_log:info("node ~p up~n", [Node]),
+ {noreply, State};
+
+handle_info({mnesia_system_event,
+ {inconsistent_database, running_partitioned_network, Node}},
+ State = #state{partitions = Partitions,
+ monitors = Monitors}) ->
+ %% We will not get a node_up from this node - yet we should treat it as
+ %% up (mostly).
+ State1 = case pmon:is_monitored({rabbit, Node}, Monitors) of
+ true -> State;
+ false -> State#state{
+ monitors = pmon:monitor({rabbit, Node}, Monitors)}
+ end,
+ ok = handle_live_rabbit(Node),
+ Partitions1 = lists:usort([Node | Partitions]),
+ {noreply, maybe_autoheal(State1#state{partitions = Partitions1})};
+
+handle_info({autoheal_msg, Msg}, State = #state{autoheal = AState,
+ partitions = Partitions}) ->
+ AState1 = rabbit_autoheal:handle_msg(Msg, AState, Partitions),
+ {noreply, State#state{autoheal = AState1}};
+
+handle_info(ping_down_nodes, State) ->
+ %% We ping nodes when some are down to ensure that we find out
+ %% about healed partitions quickly. We ping all nodes rather than
+ %% just the ones we know are down for simplicity; it's not expensive
+ %% to ping the nodes that are up, after all.
+ State1 = State#state{down_ping_timer = undefined},
+ Self = self(),
+ %% We ping in a separate process since in a partition it might
+ %% take some noticeable length of time and we don't want to block
+ %% the node monitor for that long.
+ spawn_link(fun () ->
+ ping_all(),
+ case all_nodes_up() of
+ true -> ok;
+ false -> Self ! ping_down_nodes_again
+ end
+ end),
+ {noreply, State1};
+
+handle_info(ping_down_nodes_again, State) ->
+ {noreply, ensure_ping_timer(State)};
+
+handle_info(ping_up_nodes, State) ->
+ %% In this case we need to ensure that we ping "quickly" -
+ %% i.e. only nodes that we know to be up.
+ [cast(N, keepalive) || N <- alive_nodes() -- [node()]],
+ {noreply, ensure_keepalive_timer(State#state{keepalive_timer = undefined})};
+
+handle_info({'EXIT', _, _} = Info, State = #state{autoheal = AState0}) ->
+ AState = rabbit_autoheal:process_down(Info, AState0),
+ {noreply, State#state{autoheal = AState}};
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, State) ->
+ rabbit_misc:stop_timer(State, #state.down_ping_timer),
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%----------------------------------------------------------------------------
+%% Functions that call the module specific hooks when nodes go up/down
+%%----------------------------------------------------------------------------
+
+handle_dead_node(Node, State = #state{autoheal = Autoheal}) ->
+ %% In general in rabbit_node_monitor we care about whether the
+ %% rabbit application is up rather than the node; we do this so
+ %% that we can respond in the same way to "rabbitmqctl stop_app"
+ %% and "rabbitmqctl stop" as much as possible.
+ %%
+ %% However, for pause_minority and pause_if_all_down modes we can't do
+ %% this, since we depend on looking at whether other nodes are up
+ %% to decide whether to come back up ourselves - if we decide that
+ %% based on the rabbit application we would go down and never come
+ %% back.
+ case application:get_env(rabbit, cluster_partition_handling) of
+ {ok, pause_minority} ->
+ case majority([Node]) of
+ true -> ok;
+ false -> await_cluster_recovery(fun majority/0)
+ end,
+ State;
+ {ok, {pause_if_all_down, PreferredNodes, HowToRecover}} ->
+ case in_preferred_partition(PreferredNodes, [Node]) of
+ true -> ok;
+ false -> await_cluster_recovery(
+ fun in_preferred_partition/0)
+ end,
+ case HowToRecover of
+ autoheal -> State#state{autoheal =
+ rabbit_autoheal:node_down(Node, Autoheal)};
+ _ -> State
+ end;
+ {ok, ignore} ->
+ State;
+ {ok, autoheal} ->
+ State#state{autoheal = rabbit_autoheal:node_down(Node, Autoheal)};
+ {ok, Term} ->
+ rabbit_log:warning("cluster_partition_handling ~p unrecognised, "
+ "assuming 'ignore'~n", [Term]),
+ State
+ end.
+
+await_cluster_recovery(Condition) ->
+ rabbit_log:warning("Cluster minority/secondary status detected - "
+ "awaiting recovery~n", []),
+ run_outside_applications(fun () ->
+ rabbit:stop(),
+ wait_for_cluster_recovery(Condition)
+ end, false),
+ ok.
+
+run_outside_applications(Fun, WaitForExistingProcess) ->
+ spawn_link(fun () ->
+ %% Ignore exit messages from the monitor - the link is needed
+ %% to ensure the monitor detects abnormal exits from this process
+ %% and can reset the 'restarting' status on the autoheal, avoiding
+ %% a deadlock. The monitor is restarted when rabbit does, so messages
+ %% in the other direction should be ignored.
+ process_flag(trap_exit, true),
+ %% If our group leader is inside an application we are about
+ %% to stop, application:stop/1 does not return.
+ group_leader(whereis(init), self()),
+ register_outside_app_process(Fun, WaitForExistingProcess)
+ end).
+
+register_outside_app_process(Fun, WaitForExistingProcess) ->
+ %% Ensure only one such process at a time, the exit(badarg) is
+ %% harmless if one is already running.
+ %%
+ %% If WaitForExistingProcess is false, the given fun is simply not
+ %% executed at all and the process exits.
+ %%
+ %% If WaitForExistingProcess is true, we wait for the end of the
+ %% currently running process before executing the given function.
+ try register(rabbit_outside_app_process, self()) of
+ true ->
+ do_run_outside_app_fun(Fun)
+ catch
+ error:badarg when WaitForExistingProcess ->
+ MRef = erlang:monitor(process, rabbit_outside_app_process),
+ receive
+ {'DOWN', MRef, _, _, _} ->
+ %% The existing process exited, let's try to
+ %% register again.
+ register_outside_app_process(Fun, WaitForExistingProcess)
+ end;
+ error:badarg ->
+ ok
+ end.
+
+do_run_outside_app_fun(Fun) ->
+ try
+ Fun()
+ catch _:E:Stacktrace ->
+ rabbit_log:error(
+ "rabbit_outside_app_process:~n~p~n~p~n",
+ [E, Stacktrace])
+ end.
+
+wait_for_cluster_recovery(Condition) ->
+ ping_all(),
+ case Condition() of
+ true -> rabbit:start();
+ false -> timer:sleep(?RABBIT_DOWN_PING_INTERVAL),
+ wait_for_cluster_recovery(Condition)
+ end.
+
+handle_dead_rabbit(Node, State = #state{partitions = Partitions,
+ autoheal = Autoheal}) ->
+ %% TODO: This may turn out to be a performance hog when there are
+ %% lots of nodes. We really only need to execute some of these
+ %% statements on *one* node, rather than all of them.
+ ok = rabbit_networking:on_node_down(Node),
+ ok = rabbit_amqqueue:on_node_down(Node),
+ ok = rabbit_alarm:on_node_down(Node),
+ ok = rabbit_mnesia:on_node_down(Node),
+ %% If we have been partitioned, and we are now in the only remaining
+ %% partition, we no longer care about partitions - forget them. Note
+ %% that we do not attempt to deal with individual (other) partitions
+ %% going away. It's only safe to forget anything about partitions when
+ %% there are no partitions.
+ Down = Partitions -- alive_rabbit_nodes(),
+ NoLongerPartitioned = rabbit_nodes:all_running(),
+ Partitions1 = case Partitions -- Down -- NoLongerPartitioned of
+ [] -> [];
+ _ -> Partitions
+ end,
+ ensure_ping_timer(
+ State#state{partitions = Partitions1,
+ autoheal = rabbit_autoheal:rabbit_down(Node, Autoheal)}).
+
+ensure_ping_timer(State) ->
+ rabbit_misc:ensure_timer(
+ State, #state.down_ping_timer, ?RABBIT_DOWN_PING_INTERVAL,
+ ping_down_nodes).
+
+ensure_keepalive_timer(State) ->
+ {ok, Interval} = application:get_env(rabbit, cluster_keepalive_interval),
+ rabbit_misc:ensure_timer(
+ State, #state.keepalive_timer, Interval, ping_up_nodes).
+
+handle_live_rabbit(Node) ->
+ ok = rabbit_amqqueue:on_node_up(Node),
+ ok = rabbit_alarm:on_node_up(Node),
+ ok = rabbit_mnesia:on_node_up(Node).
+
+maybe_autoheal(State = #state{partitions = []}) ->
+ State;
+
+maybe_autoheal(State = #state{autoheal = AState}) ->
+ case all_nodes_up() of
+ true -> State#state{autoheal = rabbit_autoheal:maybe_start(AState)};
+ false -> State
+ end.
+
+%%--------------------------------------------------------------------
+%% Internal utils
+%%--------------------------------------------------------------------
+
+try_read_file(FileName) ->
+ case rabbit_file:read_term_file(FileName) of
+ {ok, Term} -> {ok, Term};
+ {error, enoent} -> {error, enoent};
+ {error, E} -> throw({error, {cannot_read_file, FileName, E}})
+ end.
+
+legacy_cluster_nodes(Nodes) ->
+ %% We get all the info that we can, including the nodes from
+ %% mnesia, which will be there if the node is a disc node (empty
+ %% list otherwise)
+ lists:usort(Nodes ++ mnesia:system_info(db_nodes)).
+
+legacy_disc_nodes(AllNodes) ->
+ case AllNodes == [] orelse lists:member(node(), AllNodes) of
+ true -> [node()];
+ false -> []
+ end.
+
+add_node(Node, Nodes) -> lists:usort([Node | Nodes]).
+
+del_node(Node, Nodes) -> Nodes -- [Node].
+
+cast(Node, Msg) -> gen_server:cast({?SERVER, Node}, Msg).
+
+upgrade_to_full_partition(Proxy) ->
+ cast(Proxy, {partial_partition_disconnect, node()}),
+ disconnect(Proxy).
+
+%% When we call this, it's because we want to force Mnesia to detect a
+%% partition. But if we just disconnect_node/1 then Mnesia won't
+%% detect a very short partition. So we want to force a slightly
+%% longer disconnect. Unfortunately we don't have a way to blacklist
+%% individual nodes; the best we can do is turn off auto-connect
+%% altogether.
+disconnect(Node) ->
+ application:set_env(kernel, dist_auto_connect, never),
+ erlang:disconnect_node(Node),
+ timer:sleep(1000),
+ application:unset_env(kernel, dist_auto_connect),
+ ok.
+
+%%--------------------------------------------------------------------
+
+%% mnesia:system_info(db_nodes) (and hence
+%% rabbit_nodes:all_running()) does not return all nodes
+%% when partitioned, just those that we are sharing Mnesia state
+%% with. So we have a small set of replacement functions
+%% here. "rabbit" in a function's name implies we test if the rabbit
+%% application is up, not just the node.
+
+%% As we use these functions to decide what to do in pause_minority or
+%% pause_if_all_down states, they *must* be fast, even in the case where
+%% TCP connections are timing out. So that means we should be careful
+%% about whether we connect to nodes which are currently disconnected.
+
+majority() ->
+ majority([]).
+
+majority(NodesDown) ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ AliveNodes = alive_nodes(Nodes) -- NodesDown,
+ length(AliveNodes) / length(Nodes) > 0.5.
+
+in_preferred_partition() ->
+ {ok, {pause_if_all_down, PreferredNodes, _}} =
+ application:get_env(rabbit, cluster_partition_handling),
+ in_preferred_partition(PreferredNodes).
+
+in_preferred_partition(PreferredNodes) ->
+ in_preferred_partition(PreferredNodes, []).
+
+in_preferred_partition(PreferredNodes, NodesDown) ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ RealPreferredNodes = [N || N <- PreferredNodes, lists:member(N, Nodes)],
+ AliveNodes = alive_nodes(RealPreferredNodes) -- NodesDown,
+ RealPreferredNodes =:= [] orelse AliveNodes =/= [].
+
+all_nodes_up() ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ length(alive_nodes(Nodes)) =:= length(Nodes).
+
+-spec all_rabbit_nodes_up() -> boolean().
+
+all_rabbit_nodes_up() ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ length(alive_rabbit_nodes(Nodes)) =:= length(Nodes).
+
+-spec alive_nodes([node()]) -> [node()].
+
+alive_nodes() -> alive_nodes(rabbit_mnesia:cluster_nodes(all)).
+alive_nodes(Nodes) -> [N || N <- Nodes, lists:member(N, [node()|nodes()])].
+
+-spec alive_rabbit_nodes([node()]) -> [node()].
+
+alive_rabbit_nodes() -> alive_rabbit_nodes(rabbit_mnesia:cluster_nodes(all)).
+
+alive_rabbit_nodes(Nodes) ->
+ [N || N <- alive_nodes(Nodes), rabbit:is_running(N)].
+
+%% This one is allowed to connect!
+
+-spec ping_all() -> 'ok'.
+
+ping_all() ->
+ [net_adm:ping(N) || N <- rabbit_mnesia:cluster_nodes(all)],
+ ok.
+
+possibly_partitioned_nodes() ->
+ alive_rabbit_nodes() -- rabbit_nodes:all_running().
+
+startup_log([]) ->
+ rabbit_log:info("Starting rabbit_node_monitor~n", []);
+startup_log(Nodes) ->
+ rabbit_log:info("Starting rabbit_node_monitor, might be partitioned from ~p~n",
+ [Nodes]).
diff --git a/deps/rabbit/src/rabbit_nodes.erl b/deps/rabbit/src/rabbit_nodes.erl
new file mode 100644
index 0000000000..3034a4d513
--- /dev/null
+++ b/deps/rabbit/src/rabbit_nodes.erl
@@ -0,0 +1,157 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_nodes).
+
+-export([names/1, diagnostics/1, make/1, make/2, parts/1, cookie_hash/0,
+ is_running/2, is_process_running/2,
+ cluster_name/0, set_cluster_name/1, set_cluster_name/2, ensure_epmd/0,
+ all_running/0, name_type/0, running_count/0, total_count/0,
+ await_running_count/2, is_single_node_cluster/0,
+ boot/0]).
+-export([persistent_cluster_id/0, seed_internal_cluster_id/0, seed_user_provided_cluster_name/0]).
+
+-include_lib("kernel/include/inet.hrl").
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-define(SAMPLING_INTERVAL, 1000).
+
+-define(INTERNAL_CLUSTER_ID_PARAM_NAME, internal_cluster_id).
+
+%%----------------------------------------------------------------------------
+%% API
+%%----------------------------------------------------------------------------
+
+boot() ->
+ seed_internal_cluster_id(),
+ seed_user_provided_cluster_name().
+
+name_type() ->
+ #{nodename_type := NodeType} = rabbit_prelaunch:get_context(),
+ NodeType.
+
+-spec names(string()) ->
+ rabbit_types:ok_or_error2([{string(), integer()}], term()).
+
+names(Hostname) ->
+ rabbit_nodes_common:names(Hostname).
+
+-spec diagnostics([node()]) -> string().
+
+diagnostics(Nodes) ->
+ rabbit_nodes_common:diagnostics(Nodes).
+
+make(NameOrParts) ->
+ rabbit_nodes_common:make(NameOrParts).
+
+make(ShortName, Hostname) ->
+ make({ShortName, Hostname}).
+
+parts(NodeStr) ->
+ rabbit_nodes_common:parts(NodeStr).
+
+-spec cookie_hash() -> string().
+
+cookie_hash() ->
+ rabbit_nodes_common:cookie_hash().
+
+-spec is_running(node(), atom()) -> boolean().
+
+is_running(Node, Application) ->
+ rabbit_nodes_common:is_running(Node, Application).
+
+-spec is_process_running(node(), atom()) -> boolean().
+
+is_process_running(Node, Process) ->
+ rabbit_nodes_common:is_process_running(Node, Process).
+
+-spec cluster_name() -> binary().
+
+cluster_name() ->
+ rabbit_runtime_parameters:value_global(
+ cluster_name, cluster_name_default()).
+
+cluster_name_default() ->
+ {ID, _} = parts(node()),
+ FQDN = rabbit_net:hostname(),
+ list_to_binary(atom_to_list(make({ID, FQDN}))).
+
+-spec persistent_cluster_id() -> binary().
+persistent_cluster_id() ->
+ case rabbit_runtime_parameters:lookup_global(?INTERNAL_CLUSTER_ID_PARAM_NAME) of
+ not_found ->
+ seed_internal_cluster_id(),
+ persistent_cluster_id();
+ Param ->
+ #{value := Val, name := ?INTERNAL_CLUSTER_ID_PARAM_NAME} = maps:from_list(Param),
+ Val
+ end.
+
+-spec seed_internal_cluster_id() -> binary().
+seed_internal_cluster_id() ->
+ case rabbit_runtime_parameters:lookup_global(?INTERNAL_CLUSTER_ID_PARAM_NAME) of
+ not_found ->
+ Id = rabbit_guid:binary(rabbit_guid:gen(), "rabbitmq-cluster-id"),
+ rabbit_log:info("Initialising internal cluster ID to '~s'", [Id]),
+ rabbit_runtime_parameters:set_global(?INTERNAL_CLUSTER_ID_PARAM_NAME, Id, ?INTERNAL_USER),
+ Id;
+ Param ->
+ #{value := Val, name := ?INTERNAL_CLUSTER_ID_PARAM_NAME} = maps:from_list(Param),
+ Val
+ end.
+
+seed_user_provided_cluster_name() ->
+ case application:get_env(rabbit, cluster_name) of
+ undefined -> ok;
+ {ok, Name} ->
+ rabbit_log:info("Setting cluster name to '~s' as configured", [Name]),
+ set_cluster_name(rabbit_data_coercion:to_binary(Name))
+ end.
+
+-spec set_cluster_name(binary()) -> 'ok'.
+
+set_cluster_name(Name) ->
+ set_cluster_name(Name, ?INTERNAL_USER).
+
+-spec set_cluster_name(binary(), rabbit_types:username()) -> 'ok'.
+
+set_cluster_name(Name, Username) ->
+ %% Cluster name should be binary
+ BinaryName = rabbit_data_coercion:to_binary(Name),
+ rabbit_runtime_parameters:set_global(cluster_name, BinaryName, Username).
+
+ensure_epmd() ->
+ rabbit_nodes_common:ensure_epmd().
+
+-spec all_running() -> [node()].
+all_running() -> rabbit_mnesia:cluster_nodes(running).
+
+-spec running_count() -> integer().
+running_count() -> length(all_running()).
+
+-spec total_count() -> integer().
+total_count() -> length(rabbit_mnesia:cluster_nodes(all)).
+
+-spec is_single_node_cluster() -> boolean().
+is_single_node_cluster() ->
+ total_count() =:= 1.
+
+-spec await_running_count(integer(), integer()) -> 'ok' | {'error', atom()}.
+await_running_count(TargetCount, Timeout) ->
+ Retries = round(Timeout/?SAMPLING_INTERVAL),
+ await_running_count_with_retries(TargetCount, Retries).
+
+await_running_count_with_retries(1, _Retries) -> ok;
+await_running_count_with_retries(_TargetCount, Retries) when Retries =:= 0 ->
+ {error, timeout};
+await_running_count_with_retries(TargetCount, Retries) ->
+ case running_count() >= TargetCount of
+ true -> ok;
+ false ->
+ timer:sleep(?SAMPLING_INTERVAL),
+ await_running_count_with_retries(TargetCount, Retries - 1)
+ end.
diff --git a/deps/rabbit/src/rabbit_osiris_metrics.erl b/deps/rabbit/src/rabbit_osiris_metrics.erl
new file mode 100644
index 0000000000..7b2574c7e1
--- /dev/null
+++ b/deps/rabbit/src/rabbit_osiris_metrics.erl
@@ -0,0 +1,103 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at https://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% Copyright (c) 2012-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_osiris_metrics).
+
+-behaviour(gen_server).
+
+-export([start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+-define(TICK_TIMEOUT, 5000).
+-define(SERVER, ?MODULE).
+
+-define(STATISTICS_KEYS,
+ [policy,
+ operator_policy,
+ effective_policy_definition,
+ state,
+ leader,
+ online,
+ members
+ ]).
+
+-record(state, {timeout :: non_neg_integer()}).
+
+%%----------------------------------------------------------------------------
+%% Starts the raw metrics storage and owns the ETS tables.
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() ->
+ gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+init([]) ->
+ Timeout = application:get_env(rabbit, stream_tick_interval,
+ ?TICK_TIMEOUT),
+ erlang:send_after(Timeout, self(), tick),
+ {ok, #state{timeout = Timeout}}.
+
+handle_call(_Request, _From, State) ->
+ {noreply, State}.
+
+handle_cast(_Request, State) ->
+ {noreply, State}.
+
+handle_info(tick, #state{timeout = Timeout} = State) ->
+ Data = osiris_counters:overview(),
+ maps:map(
+ fun ({osiris_writer, QName}, #{offset := Offs,
+ first_offset := FstOffs}) ->
+ COffs = Offs + 1 - FstOffs,
+ rabbit_core_metrics:queue_stats(QName, COffs, 0, COffs, 0),
+ Infos = try
+ %% TODO complete stats!
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} ->
+ rabbit_stream_queue:info(Q, ?STATISTICS_KEYS);
+ _ ->
+ []
+ end
+ catch
+ _:_ ->
+ %% It's possible that the writer has died but
+ %% it's still on the amqqueue record, so the
+ %% `erlang:process_info/2` calls will return
+ %% `undefined` and crash with a badmatch.
+ %% At least for now, skipping the metrics might
+ %% be the best option. Otherwise this brings
+ %% down `rabbit_sup` and the whole `rabbit` app.
+ []
+ end,
+ rabbit_core_metrics:queue_stats(QName, Infos),
+ rabbit_event:notify(queue_stats, Infos ++ [{name, QName},
+ {messages, COffs},
+ {messages_ready, COffs},
+ {messages_unacknowledged, 0}]),
+ ok;
+ (_, _V) ->
+ ok
+ end, Data),
+ erlang:send_after(Timeout, self(), tick),
+ {noreply, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
diff --git a/deps/rabbit/src/rabbit_parameter_validation.erl b/deps/rabbit/src/rabbit_parameter_validation.erl
new file mode 100644
index 0000000000..66287ec799
--- /dev/null
+++ b/deps/rabbit/src/rabbit_parameter_validation.erl
@@ -0,0 +1,88 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_parameter_validation).
+
+-export([number/2, integer/2, binary/2, boolean/2, list/2, regex/2, proplist/3, enum/1]).
+
+number(_Name, Term) when is_number(Term) ->
+ ok;
+
+number(Name, Term) ->
+ {error, "~s should be a number, actually was ~p", [Name, Term]}.
+
+integer(_Name, Term) when is_integer(Term) ->
+ ok;
+
+integer(Name, Term) ->
+ {error, "~s should be a number, actually was ~p", [Name, Term]}.
+
+binary(_Name, Term) when is_binary(Term) ->
+ ok;
+
+binary(Name, Term) ->
+ {error, "~s should be binary, actually was ~p", [Name, Term]}.
+
+boolean(_Name, Term) when is_boolean(Term) ->
+ ok;
+boolean(Name, Term) ->
+ {error, "~s should be boolean, actually was ~p", [Name, Term]}.
+
+list(_Name, Term) when is_list(Term) ->
+ ok;
+
+list(Name, Term) ->
+ {error, "~s should be list, actually was ~p", [Name, Term]}.
+
+regex(Name, Term) when is_binary(Term) ->
+ case re:compile(Term) of
+ {ok, _} -> ok;
+ {error, Reason} -> {error, "~s should be regular expression "
+ "but is invalid: ~p", [Name, Reason]}
+ end;
+regex(Name, Term) ->
+ {error, "~s should be a binary but was ~p", [Name, Term]}.
+
+proplist(Name, Constraints, Term) when is_list(Term) ->
+ {Results, Remainder}
+ = lists:foldl(
+ fun ({Key, Fun, Needed}, {Results0, Term0}) ->
+ case {lists:keytake(Key, 1, Term0), Needed} of
+ {{value, {Key, Value}, Term1}, _} ->
+ {[Fun(Key, Value) | Results0],
+ Term1};
+ {false, mandatory} ->
+ {[{error, "Key \"~s\" not found in ~s",
+ [Key, Name]} | Results0], Term0};
+ {false, optional} ->
+ {Results0, Term0}
+ end
+ end, {[], Term}, Constraints),
+ case Remainder of
+ [] -> Results;
+ _ -> [{error, "Unrecognised terms ~p in ~s", [Remainder, Name]}
+ | Results]
+ end;
+
+proplist(Name, Constraints, Term0) when is_map(Term0) ->
+ Term = maps:to_list(Term0),
+ proplist(Name, Constraints, Term);
+
+proplist(Name, _Constraints, Term) ->
+ {error, "~s not a list ~p", [Name, Term]}.
+
+enum(OptionsA) ->
+ Options = [list_to_binary(atom_to_list(O)) || O <- OptionsA],
+ fun (Name, Term) when is_binary(Term) ->
+ case lists:member(Term, Options) of
+ true -> ok;
+ false -> {error, "~s should be one of ~p, actually was ~p",
+ [Name, Options, Term]}
+ end;
+ (Name, Term) ->
+ {error, "~s should be binary, actually was ~p", [Name, Term]}
+ end.
diff --git a/deps/rabbit/src/rabbit_password.erl b/deps/rabbit/src/rabbit_password.erl
new file mode 100644
index 0000000000..6a5254b707
--- /dev/null
+++ b/deps/rabbit/src/rabbit_password.erl
@@ -0,0 +1,52 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_password).
+-include("rabbit.hrl").
+
+-define(DEFAULT_HASHING_MODULE, rabbit_password_hashing_sha256).
+
+%%
+%% API
+%%
+
+-export([hash/1, hash/2, generate_salt/0, salted_hash/2, salted_hash/3,
+ hashing_mod/0, hashing_mod/1]).
+
+hash(Cleartext) ->
+ hash(hashing_mod(), Cleartext).
+
+hash(HashingMod, Cleartext) ->
+ SaltBin = generate_salt(),
+ Hash = salted_hash(HashingMod, SaltBin, Cleartext),
+ <<SaltBin/binary, Hash/binary>>.
+
+generate_salt() ->
+ Salt = rand:uniform(16#ffffffff),
+ <<Salt:32>>.
+
+salted_hash(Salt, Cleartext) ->
+ salted_hash(hashing_mod(), Salt, Cleartext).
+
+salted_hash(Mod, Salt, Cleartext) ->
+ Fun = fun Mod:hash/1,
+ Fun(<<Salt/binary, Cleartext/binary>>).
+
+hashing_mod() ->
+ rabbit_misc:get_env(rabbit, password_hashing_module,
+ ?DEFAULT_HASHING_MODULE).
+
+hashing_mod(rabbit_password_hashing_sha256) ->
+ rabbit_password_hashing_sha256;
+hashing_mod(rabbit_password_hashing_md5) ->
+ rabbit_password_hashing_md5;
+%% fall back to the hashing function that's been used prior to 3.6.0
+hashing_mod(undefined) ->
+ rabbit_password_hashing_md5;
+%% if a custom module is configured, simply use it
+hashing_mod(CustomMod) when is_atom(CustomMod) ->
+ CustomMod.
diff --git a/deps/rabbit/src/rabbit_password_hashing_md5.erl b/deps/rabbit/src/rabbit_password_hashing_md5.erl
new file mode 100644
index 0000000000..1e306673ca
--- /dev/null
+++ b/deps/rabbit/src/rabbit_password_hashing_md5.erl
@@ -0,0 +1,19 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% Legacy hashing implementation, only used as a last resort when
+%% #internal_user.hashing_algorithm is md5 or undefined (the case in
+%% pre-3.6.0 user records).
+
+-module(rabbit_password_hashing_md5).
+
+-behaviour(rabbit_password_hashing).
+
+-export([hash/1]).
+
+hash(Binary) ->
+ erlang:md5(Binary).
diff --git a/deps/rabbit/src/rabbit_password_hashing_sha256.erl b/deps/rabbit/src/rabbit_password_hashing_sha256.erl
new file mode 100644
index 0000000000..3ccc298efd
--- /dev/null
+++ b/deps/rabbit/src/rabbit_password_hashing_sha256.erl
@@ -0,0 +1,15 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_password_hashing_sha256).
+
+-behaviour(rabbit_password_hashing).
+
+-export([hash/1]).
+
+hash(Binary) ->
+ crypto:hash(sha256, Binary).
diff --git a/deps/rabbit/src/rabbit_password_hashing_sha512.erl b/deps/rabbit/src/rabbit_password_hashing_sha512.erl
new file mode 100644
index 0000000000..c5edf8888a
--- /dev/null
+++ b/deps/rabbit/src/rabbit_password_hashing_sha512.erl
@@ -0,0 +1,15 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_password_hashing_sha512).
+
+-behaviour(rabbit_password_hashing).
+
+-export([hash/1]).
+
+hash(Binary) ->
+ crypto:hash(sha512, Binary).
diff --git a/deps/rabbit/src/rabbit_peer_discovery.erl b/deps/rabbit/src/rabbit_peer_discovery.erl
new file mode 100644
index 0000000000..1688579450
--- /dev/null
+++ b/deps/rabbit/src/rabbit_peer_discovery.erl
@@ -0,0 +1,326 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_peer_discovery).
+
+%%
+%% API
+%%
+
+-export([maybe_init/0, discover_cluster_nodes/0, backend/0, node_type/0,
+ normalize/1, format_discovered_nodes/1, log_configured_backend/0,
+ register/0, unregister/0, maybe_register/0, maybe_unregister/0,
+ maybe_inject_randomized_delay/0, lock/0, unlock/1,
+ discovery_retries/0]).
+-export([append_node_prefix/1, node_prefix/0, locking_retry_timeout/0,
+ lock_acquisition_failure_mode/0]).
+
+-define(DEFAULT_BACKEND, rabbit_peer_discovery_classic_config).
+
+%% what node type is used by default for this node when joining
+%% a new cluster as a virgin node
+-define(DEFAULT_NODE_TYPE, disc).
+
+%% default node prefix to attach to discovered hostnames
+-define(DEFAULT_PREFIX, "rabbit").
+
+%% default randomized delay range, in seconds
+-define(DEFAULT_STARTUP_RANDOMIZED_DELAY, {5, 60}).
+
+%% default discovery retries and interval.
+-define(DEFAULT_DISCOVERY_RETRY_COUNT, 10).
+-define(DEFAULT_DISCOVERY_RETRY_INTERVAL_MS, 500).
+
+-define(NODENAME_PART_SEPARATOR, "@").
+
+-spec backend() -> atom().
+
+backend() ->
+ case application:get_env(rabbit, cluster_formation) of
+ {ok, Proplist} ->
+ proplists:get_value(peer_discovery_backend, Proplist, ?DEFAULT_BACKEND);
+ undefined ->
+ ?DEFAULT_BACKEND
+ end.
+
+
+
+-spec node_type() -> rabbit_types:node_type().
+
+node_type() ->
+ case application:get_env(rabbit, cluster_formation) of
+ {ok, Proplist} ->
+ proplists:get_value(node_type, Proplist, ?DEFAULT_NODE_TYPE);
+ undefined ->
+ ?DEFAULT_NODE_TYPE
+ end.
+
+-spec locking_retry_timeout() -> {Retries :: integer(), Timeout :: integer()}.
+
+locking_retry_timeout() ->
+ case application:get_env(rabbit, cluster_formation) of
+ {ok, Proplist} ->
+ Retries = proplists:get_value(lock_retry_limit, Proplist, 10),
+ Timeout = proplists:get_value(lock_retry_timeout, Proplist, 30000),
+ {Retries, Timeout};
+ undefined ->
+ {10, 30000}
+ end.
+
+-spec lock_acquisition_failure_mode() -> ignore | fail.
+
+lock_acquisition_failure_mode() ->
+ case application:get_env(rabbit, cluster_formation) of
+ {ok, Proplist} ->
+ proplists:get_value(lock_acquisition_failure_mode, Proplist, fail);
+ undefined ->
+ fail
+ end.
+
+-spec log_configured_backend() -> ok.
+
+log_configured_backend() ->
+ rabbit_log:info("Configured peer discovery backend: ~s~n", [backend()]).
+
+maybe_init() ->
+ Backend = backend(),
+ code:ensure_loaded(Backend),
+ case erlang:function_exported(Backend, init, 0) of
+ true ->
+ rabbit_log:debug("Peer discovery backend supports initialisation"),
+ case Backend:init() of
+ ok ->
+ rabbit_log:debug("Peer discovery backend initialisation succeeded"),
+ ok;
+ {error, Error} ->
+ rabbit_log:warning("Peer discovery backend initialisation failed: ~p.", [Error]),
+ ok
+ end;
+ false ->
+ rabbit_log:debug("Peer discovery backend does not support initialisation"),
+ ok
+ end.
+
+
+%% This module doesn't currently sanity-check the return value of
+%% `Backend:list_nodes()`. Therefore, it could return something invalid:
+%% thus the `{Å“k, any()} in the spec.
+%%
+%% `rabbit_mnesia:init_from_config()` does some verifications.
+
+-spec discover_cluster_nodes() ->
+ {ok, {Nodes :: [node()], NodeType :: rabbit_types:node_type()} | any()} |
+ {error, Reason :: string()}.
+
+discover_cluster_nodes() ->
+ Backend = backend(),
+ normalize(Backend:list_nodes()).
+
+
+-spec maybe_register() -> ok.
+
+maybe_register() ->
+ Backend = backend(),
+ case Backend:supports_registration() of
+ true ->
+ register(),
+ Backend:post_registration();
+ false ->
+ rabbit_log:info("Peer discovery backend ~s does not support registration, skipping registration.", [Backend]),
+ ok
+ end.
+
+
+-spec maybe_unregister() -> ok.
+
+maybe_unregister() ->
+ Backend = backend(),
+ case Backend:supports_registration() of
+ true ->
+ unregister();
+ false ->
+ rabbit_log:info("Peer discovery backend ~s does not support registration, skipping unregistration.", [Backend]),
+ ok
+ end.
+
+-spec discovery_retries() -> {Retries :: integer(), Interval :: integer()}.
+
+discovery_retries() ->
+ case application:get_env(rabbit, cluster_formation) of
+ {ok, Proplist} ->
+ Retries = proplists:get_value(discovery_retry_limit, Proplist, ?DEFAULT_DISCOVERY_RETRY_COUNT),
+ Interval = proplists:get_value(discovery_retry_interval, Proplist, ?DEFAULT_DISCOVERY_RETRY_INTERVAL_MS),
+ {Retries, Interval};
+ undefined ->
+ {?DEFAULT_DISCOVERY_RETRY_COUNT, ?DEFAULT_DISCOVERY_RETRY_INTERVAL_MS}
+ end.
+
+
+-spec maybe_inject_randomized_delay() -> ok.
+maybe_inject_randomized_delay() ->
+ Backend = backend(),
+ case Backend:supports_registration() of
+ true ->
+ rabbit_log:info("Peer discovery backend ~s supports registration.", [Backend]),
+ inject_randomized_delay();
+ false ->
+ rabbit_log:info("Peer discovery backend ~s does not support registration, skipping randomized startup delay.", [Backend]),
+ ok
+ end.
+
+-spec inject_randomized_delay() -> ok.
+
+inject_randomized_delay() ->
+ {Min, Max} = randomized_delay_range_in_ms(),
+ case {Min, Max} of
+ %% When the max value is set to 0, consider the delay to be disabled.
+ %% In addition, `rand:uniform/1` will fail with a "no function clause"
+ %% when the argument is 0.
+ {_, 0} ->
+ rabbit_log:info("Randomized delay range's upper bound is set to 0. Considering it disabled."),
+ ok;
+ {_, N} when is_number(N) ->
+ rand:seed(exsplus),
+ RandomVal = rand:uniform(round(N)),
+ rabbit_log:debug("Randomized startup delay: configured range is from ~p to ~p milliseconds, PRNG pick: ~p...",
+ [Min, Max, RandomVal]),
+ Effective = case RandomVal < Min of
+ true -> Min;
+ false -> RandomVal
+ end,
+ rabbit_log:info("Will wait for ~p milliseconds before proceeding with registration...", [Effective]),
+ timer:sleep(Effective),
+ ok
+ end.
+
+-spec randomized_delay_range_in_ms() -> {integer(), integer()}.
+
+randomized_delay_range_in_ms() ->
+ Backend = backend(),
+ Default = case erlang:function_exported(Backend, randomized_startup_delay_range, 0) of
+ true -> Backend:randomized_startup_delay_range();
+ false -> ?DEFAULT_STARTUP_RANDOMIZED_DELAY
+ end,
+ {Min, Max} = case application:get_env(rabbit, cluster_formation) of
+ {ok, Proplist} ->
+ proplists:get_value(randomized_startup_delay_range, Proplist, Default);
+ undefined ->
+ Default
+ end,
+ {Min * 1000, Max * 1000}.
+
+
+-spec register() -> ok.
+
+register() ->
+ Backend = backend(),
+ rabbit_log:info("Will register with peer discovery backend ~s", [Backend]),
+ case Backend:register() of
+ ok -> ok;
+ {error, Error} ->
+ rabbit_log:error("Failed to register with peer discovery backend ~s: ~p",
+ [Backend, Error]),
+ ok
+ end.
+
+
+-spec unregister() -> ok.
+
+unregister() ->
+ Backend = backend(),
+ rabbit_log:info("Will unregister with peer discovery backend ~s", [Backend]),
+ case Backend:unregister() of
+ ok -> ok;
+ {error, Error} ->
+ rabbit_log:error("Failed to unregister with peer discovery backend ~s: ~p",
+ [Backend, Error]),
+ ok
+ end.
+
+-spec lock() -> {ok, Data :: term()} | not_supported | {error, Reason :: string()}.
+
+lock() ->
+ Backend = backend(),
+ rabbit_log:info("Will try to lock with peer discovery backend ~s", [Backend]),
+ case Backend:lock(node()) of
+ {error, Reason} = Error ->
+ rabbit_log:error("Failed to lock with peer discovery backend ~s: ~p",
+ [Backend, Reason]),
+ Error;
+ Any ->
+ Any
+ end.
+
+-spec unlock(Data :: term()) -> ok | {error, Reason :: string()}.
+
+unlock(Data) ->
+ Backend = backend(),
+ rabbit_log:info("Will try to unlock with peer discovery backend ~s", [Backend]),
+ case Backend:unlock(Data) of
+ {error, Reason} = Error ->
+ rabbit_log:error("Failed to unlock with peer discovery backend ~s: ~p, "
+ "lock data: ~p",
+ [Backend, Reason, Data]),
+ Error;
+ Any ->
+ Any
+ end.
+
+%%
+%% Implementation
+%%
+
+-spec normalize(Nodes :: [node()] |
+ {Nodes :: [node()],
+ NodeType :: rabbit_types:node_type()} |
+ {ok, Nodes :: [node()]} |
+ {ok, {Nodes :: [node()],
+ NodeType :: rabbit_types:node_type()}} |
+ {error, Reason :: string()}) ->
+ {ok, {Nodes :: [node()], NodeType :: rabbit_types:node_type()}} |
+ {error, Reason :: string()}.
+
+normalize(Nodes) when is_list(Nodes) ->
+ {ok, {Nodes, disc}};
+normalize({Nodes, NodeType}) when is_list(Nodes) andalso is_atom(NodeType) ->
+ {ok, {Nodes, NodeType}};
+normalize({ok, Nodes}) when is_list(Nodes) ->
+ {ok, {Nodes, disc}};
+normalize({ok, {Nodes, NodeType}}) when is_list(Nodes) andalso is_atom(NodeType) ->
+ {ok, {Nodes, NodeType}};
+normalize({error, Reason}) ->
+ {error, Reason}.
+
+-spec format_discovered_nodes(Nodes :: list()) -> string().
+
+format_discovered_nodes(Nodes) ->
+ %% NOTE: in OTP 21 string:join/2 is deprecated but still available.
+ %% Its recommended replacement is not a drop-in one, though, so
+ %% we will not be switching just yet.
+ string:join(lists:map(fun rabbit_data_coercion:to_list/1, Nodes), ", ").
+
+
+
+-spec node_prefix() -> string().
+
+node_prefix() ->
+ case string:tokens(atom_to_list(node()), ?NODENAME_PART_SEPARATOR) of
+ [Prefix, _] -> Prefix;
+ [_] -> ?DEFAULT_PREFIX
+ end.
+
+
+
+-spec append_node_prefix(Value :: binary() | string()) -> string().
+
+append_node_prefix(Value) when is_binary(Value) orelse is_list(Value) ->
+ Val = rabbit_data_coercion:to_list(Value),
+ Hostname = case string:tokens(Val, ?NODENAME_PART_SEPARATOR) of
+ [_ExistingPrefix, HN] -> HN;
+ [HN] -> HN
+ end,
+ string:join([node_prefix(), Hostname], ?NODENAME_PART_SEPARATOR).
diff --git a/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl b/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl
new file mode 100644
index 0000000000..8bc7382a75
--- /dev/null
+++ b/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl
@@ -0,0 +1,75 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_peer_discovery_classic_config).
+-behaviour(rabbit_peer_discovery_backend).
+
+-include("rabbit.hrl").
+
+-export([list_nodes/0, supports_registration/0, register/0, unregister/0,
+ post_registration/0, lock/1, unlock/1]).
+
+%%
+%% API
+%%
+
+-spec list_nodes() -> {ok, {Nodes :: [node()], rabbit_types:node_type()}} |
+ {error, Reason :: string()}.
+
+list_nodes() ->
+ case application:get_env(rabbit, cluster_nodes, {[], disc}) of
+ {_Nodes, _NodeType} = Pair -> {ok, Pair};
+ Nodes when is_list(Nodes) -> {ok, {Nodes, disc}}
+ end.
+
+-spec supports_registration() -> boolean().
+
+supports_registration() ->
+ %% If we don't have any nodes configured, skip randomized delay and similar operations
+ %% as we don't want to delay startup for no reason. MK.
+ has_any_peer_nodes_configured().
+
+-spec register() -> ok.
+
+register() ->
+ ok.
+
+-spec unregister() -> ok.
+
+unregister() ->
+ ok.
+
+-spec post_registration() -> ok.
+
+post_registration() ->
+ ok.
+
+-spec lock(Node :: atom()) -> not_supported.
+
+lock(_Node) ->
+ not_supported.
+
+-spec unlock(Data :: term()) -> ok.
+
+unlock(_Data) ->
+ ok.
+
+%%
+%% Helpers
+%%
+
+has_any_peer_nodes_configured() ->
+ case application:get_env(rabbit, cluster_nodes, []) of
+ {[], _NodeType} ->
+ false;
+ {Nodes, _NodeType} when is_list(Nodes) ->
+ true;
+ [] ->
+ false;
+ Nodes when is_list(Nodes) ->
+ true
+ end.
diff --git a/deps/rabbit/src/rabbit_peer_discovery_dns.erl b/deps/rabbit/src/rabbit_peer_discovery_dns.erl
new file mode 100644
index 0000000000..6e343a6e2d
--- /dev/null
+++ b/deps/rabbit/src/rabbit_peer_discovery_dns.erl
@@ -0,0 +1,113 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_peer_discovery_dns).
+-behaviour(rabbit_peer_discovery_backend).
+
+-include("rabbit.hrl").
+
+-export([list_nodes/0, supports_registration/0, register/0, unregister/0,
+ post_registration/0, lock/1, unlock/1]).
+%% for tests
+-export([discover_nodes/2, discover_hostnames/2]).
+
+%%
+%% API
+%%
+
+-spec list_nodes() ->
+ {ok, {Nodes :: [node()], rabbit_types:node_type()}}.
+
+list_nodes() ->
+ case application:get_env(rabbit, cluster_formation) of
+ undefined ->
+ {ok, {[], disc}};
+ {ok, ClusterFormation} ->
+ case proplists:get_value(peer_discovery_dns, ClusterFormation) of
+ undefined ->
+ rabbit_log:warning("Peer discovery backend is set to ~s "
+ "but final config does not contain rabbit.cluster_formation.peer_discovery_dns. "
+ "Cannot discover any nodes because seed hostname is not configured!",
+ [?MODULE]),
+ {ok, {[], disc}};
+ Proplist ->
+ Hostname = rabbit_data_coercion:to_list(proplists:get_value(hostname, Proplist)),
+
+ {ok, {discover_nodes(Hostname, net_kernel:longnames()), rabbit_peer_discovery:node_type()}}
+ end
+ end.
+
+
+-spec supports_registration() -> boolean().
+
+supports_registration() ->
+ false.
+
+
+-spec register() -> ok.
+
+register() ->
+ ok.
+
+-spec unregister() -> ok.
+
+unregister() ->
+ ok.
+
+-spec post_registration() -> ok.
+
+post_registration() ->
+ ok.
+
+-spec lock(Node :: atom()) -> not_supported.
+
+lock(_Node) ->
+ not_supported.
+
+-spec unlock(Data :: term()) -> ok.
+
+unlock(_Data) ->
+ ok.
+
+%%
+%% Implementation
+%%
+
+discover_nodes(SeedHostname, LongNamesUsed) ->
+ [list_to_atom(rabbit_peer_discovery:append_node_prefix(H)) ||
+ H <- discover_hostnames(SeedHostname, LongNamesUsed)].
+
+discover_hostnames(SeedHostname, LongNamesUsed) ->
+ lookup(SeedHostname, LongNamesUsed, ipv4) ++
+ lookup(SeedHostname, LongNamesUsed, ipv6).
+
+decode_record(ipv4) ->
+ a;
+decode_record(ipv6) ->
+ aaaa.
+
+lookup(SeedHostname, LongNamesUsed, IPv) ->
+ IPs = inet_res:lookup(SeedHostname, in, decode_record(IPv)),
+ rabbit_log:info("Addresses discovered via ~s records of ~s: ~s",
+ [string:to_upper(atom_to_list(decode_record(IPv))),
+ SeedHostname,
+ string:join([inet_parse:ntoa(IP) || IP <- IPs], ", ")]),
+ Hosts = [extract_host(inet:gethostbyaddr(A), LongNamesUsed, A) ||
+ A <- IPs],
+ lists:filter(fun(E) -> E =/= error end, Hosts).
+
+
+%% long node names are used
+extract_host({ok, {hostent, FQDN, _, _, _, _}}, true, _Address) ->
+ FQDN;
+%% short node names are used
+extract_host({ok, {hostent, FQDN, _, _, _, _}}, false, _Address) ->
+ lists:nth(1, string:tokens(FQDN, "."));
+extract_host({error, Error}, _, Address) ->
+ rabbit_log:error("Reverse DNS lookup for address ~s failed: ~p",
+ [inet_parse:ntoa(Address), Error]),
+ error.
diff --git a/deps/rabbit/src/rabbit_plugins.erl b/deps/rabbit/src/rabbit_plugins.erl
new file mode 100644
index 0000000000..5697ffc29a
--- /dev/null
+++ b/deps/rabbit/src/rabbit_plugins.erl
@@ -0,0 +1,699 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2011-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_plugins).
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include_lib("stdlib/include/zip.hrl").
+
+-export([setup/0, active/0, read_enabled/1, list/1, list/2, dependencies/3, running_plugins/0]).
+-export([ensure/1]).
+-export([validate_plugins/1, format_invalid_plugins/1]).
+-export([is_strictly_plugin/1, strictly_plugins/2, strictly_plugins/1]).
+-export([plugins_dir/0, plugin_names/1, plugins_expand_dir/0, enabled_plugins_file/0]).
+
+% Export for testing purpose.
+-export([is_version_supported/2, validate_plugins/2]).
+%%----------------------------------------------------------------------------
+
+-type plugin_name() :: atom().
+
+%%----------------------------------------------------------------------------
+
+-spec ensure(string()) -> {'ok', [atom()], [atom()]} | {error, any()}.
+
+ensure(FileJustChanged) ->
+ case rabbit:is_running() of
+ true -> ensure1(FileJustChanged);
+ false -> {error, rabbit_not_running}
+ end.
+
+ensure1(FileJustChanged0) ->
+ {ok, OurFile0} = application:get_env(rabbit, enabled_plugins_file),
+ FileJustChanged = filename:nativename(FileJustChanged0),
+ OurFile = filename:nativename(OurFile0),
+ case OurFile of
+ FileJustChanged ->
+ Enabled = read_enabled(OurFile),
+ Wanted = prepare_plugins(Enabled),
+ Current = active(),
+ Start = Wanted -- Current,
+ Stop = Current -- Wanted,
+ rabbit:start_apps(Start),
+ %% We need sync_notify here since mgmt will attempt to look at all
+ %% the modules for the disabled plugins - if they are unloaded
+ %% that won't work.
+ ok = rabbit_event:sync_notify(plugins_changed, [{enabled, Start},
+ {disabled, Stop}]),
+ %% The app_utils module stops the apps in reverse order, so we should
+ %% pass them here in dependency order.
+ rabbit:stop_apps(lists:reverse(Stop)),
+ clean_plugins(Stop),
+ case {Start, Stop} of
+ {[], []} ->
+ ok;
+ {[], _} ->
+ rabbit_log:info("Plugins changed; disabled ~p~n",
+ [Stop]);
+ {_, []} ->
+ rabbit_log:info("Plugins changed; enabled ~p~n",
+ [Start]);
+ {_, _} ->
+ rabbit_log:info("Plugins changed; enabled ~p, disabled ~p~n",
+ [Start, Stop])
+ end,
+ {ok, Start, Stop};
+ _ ->
+ {error, {enabled_plugins_mismatch, FileJustChanged, OurFile}}
+ end.
+
+-spec plugins_expand_dir() -> file:filename().
+plugins_expand_dir() ->
+ case application:get_env(rabbit, plugins_expand_dir) of
+ {ok, ExpandDir} ->
+ ExpandDir;
+ _ ->
+ filename:join([rabbit_mnesia:dir(), "plugins_expand_dir"])
+ end.
+
+-spec plugins_dir() -> file:filename().
+plugins_dir() ->
+ case application:get_env(rabbit, plugins_dir) of
+ {ok, PluginsDistDir} ->
+ PluginsDistDir;
+ _ ->
+ filename:join([rabbit_mnesia:dir(), "plugins_dir_stub"])
+ end.
+
+-spec enabled_plugins_file() -> file:filename().
+enabled_plugins_file() ->
+ case application:get_env(rabbit, enabled_plugins_file) of
+ {ok, Val} ->
+ Val;
+ _ ->
+ filename:join([rabbit_mnesia:dir(), "enabled_plugins"])
+ end.
+
+-spec enabled_plugins() -> [atom()].
+enabled_plugins() ->
+ case application:get_env(rabbit, enabled_plugins_file) of
+ {ok, EnabledFile} ->
+ read_enabled(EnabledFile);
+ _ ->
+ []
+ end.
+
+%% @doc Prepares the file system and installs all enabled plugins.
+
+-spec setup() -> [plugin_name()].
+
+setup() ->
+ ExpandDir = plugins_expand_dir(),
+ %% Eliminate the contents of the destination directory
+ case delete_recursively(ExpandDir) of
+ ok -> ok;
+ {error, E1} -> throw({error, {cannot_delete_plugins_expand_dir,
+ [ExpandDir, E1]}})
+ end,
+ Enabled = enabled_plugins(),
+ prepare_plugins(Enabled).
+
+%% @doc Lists the plugins which are currently running.
+
+-spec active() -> [plugin_name()].
+
+active() ->
+ InstalledPlugins = plugin_names(list(plugins_dir())),
+ [App || {App, _, _} <- rabbit_misc:which_applications(),
+ lists:member(App, InstalledPlugins)].
+
+%% @doc Get the list of plugins which are ready to be enabled.
+
+-spec list(string()) -> [#plugin{}].
+
+list(PluginsPath) ->
+ list(PluginsPath, false).
+
+-spec list(string(), boolean()) -> [#plugin{}].
+
+list(PluginsPath, IncludeRequiredDeps) ->
+ {AllPlugins, LoadingProblems} = discover_plugins(split_path(PluginsPath)),
+ {UniquePlugins, DuplicateProblems} = remove_duplicate_plugins(AllPlugins),
+ Plugins1 = maybe_keep_required_deps(IncludeRequiredDeps, UniquePlugins),
+ Plugins2 = remove_plugins(Plugins1),
+ maybe_report_plugin_loading_problems(LoadingProblems ++ DuplicateProblems),
+ ensure_dependencies(Plugins2).
+
+%% @doc Read the list of enabled plugins from the supplied term file.
+
+-spec read_enabled(file:filename()) -> [plugin_name()].
+
+read_enabled(PluginsFile) ->
+ case rabbit_file:read_term_file(PluginsFile) of
+ {ok, [Plugins]} -> Plugins;
+ {ok, []} -> [];
+ {ok, [_|_]} -> throw({error, {malformed_enabled_plugins_file,
+ PluginsFile}});
+ {error, enoent} -> [];
+ {error, Reason} -> throw({error, {cannot_read_enabled_plugins_file,
+ PluginsFile, Reason}})
+ end.
+
+%% @doc Calculate the dependency graph from <i>Sources</i>.
+%% When Reverse =:= true the bottom/leaf level applications are returned in
+%% the resulting list, otherwise they're skipped.
+
+-spec dependencies(boolean(), [plugin_name()], [#plugin{}]) ->
+ [plugin_name()].
+
+dependencies(Reverse, Sources, AllPlugins) ->
+ {ok, G} = rabbit_misc:build_acyclic_graph(
+ fun ({App, _Deps}) -> [{App, App}] end,
+ fun ({App, Deps}) -> [{App, Dep} || Dep <- Deps] end,
+ [{Name, Deps} || #plugin{name = Name,
+ dependencies = Deps} <- AllPlugins]),
+ Dests = case Reverse of
+ false -> digraph_utils:reachable(Sources, G);
+ true -> digraph_utils:reaching(Sources, G)
+ end,
+ OrderedDests = digraph_utils:postorder(digraph_utils:subgraph(G, Dests)),
+ true = digraph:delete(G),
+ OrderedDests.
+
+%% Filter real plugins from application dependencies
+
+-spec is_strictly_plugin(#plugin{}) -> boolean().
+
+is_strictly_plugin(#plugin{extra_dependencies = ExtraDeps}) ->
+ lists:member(rabbit, ExtraDeps).
+
+-spec strictly_plugins([plugin_name()], [#plugin{}]) -> [plugin_name()].
+
+strictly_plugins(Plugins, AllPlugins) ->
+ lists:filter(
+ fun(Name) ->
+ is_strictly_plugin(lists:keyfind(Name, #plugin.name, AllPlugins))
+ end, Plugins).
+
+-spec strictly_plugins([plugin_name()]) -> [plugin_name()].
+
+strictly_plugins(Plugins) ->
+ AllPlugins = list(plugins_dir()),
+ lists:filter(
+ fun(Name) ->
+ is_strictly_plugin(lists:keyfind(Name, #plugin.name, AllPlugins))
+ end, Plugins).
+
+%% For a few known cases, an externally provided plugin can be trusted.
+%% In this special case, it overrides the plugin.
+is_plugin_provided_by_otp(#plugin{name = eldap}) ->
+ %% eldap was added to Erlang/OTP R15B01 (ERTS 5.9.1). In this case,
+ %% we prefer this version to the plugin.
+ rabbit_misc:version_compare(erlang:system_info(version), "5.9.1", gte);
+is_plugin_provided_by_otp(_) ->
+ false.
+
+%% Make sure we don't list OTP apps in here, and also that we detect
+%% missing dependencies.
+ensure_dependencies(Plugins) ->
+ Names = plugin_names(Plugins),
+ NotThere = [Dep || #plugin{dependencies = Deps} <- Plugins,
+ Dep <- Deps,
+ not lists:member(Dep, Names)],
+ {OTP, Missing} = lists:partition(fun is_loadable/1, lists:usort(NotThere)),
+ case Missing of
+ [] -> ok;
+ _ -> Blame = [Name || #plugin{name = Name,
+ dependencies = Deps} <- Plugins,
+ lists:any(fun (Dep) ->
+ lists:member(Dep, Missing)
+ end, Deps)],
+ throw({error, {missing_dependencies, Missing, Blame}})
+ end,
+ [P#plugin{dependencies = Deps -- OTP,
+ extra_dependencies = Deps -- (Deps -- OTP)}
+ || P = #plugin{dependencies = Deps} <- Plugins].
+
+is_loadable(App) ->
+ case application:load(App) of
+ {error, {already_loaded, _}} -> true;
+ ok -> application:unload(App),
+ true;
+ _ -> false
+ end.
+
+
+%% List running plugins along with their version.
+-spec running_plugins() -> {ok, [{atom(), Vsn :: string()}]}.
+running_plugins() ->
+ ActivePlugins = active(),
+ {ok, [{App, Vsn} || {App, _ , Vsn} <- rabbit_misc:which_applications(), lists:member(App, ActivePlugins)]}.
+
+%%----------------------------------------------------------------------------
+
+prepare_plugins(Enabled) ->
+ ExpandDir = plugins_expand_dir(),
+ AllPlugins = list(plugins_dir()),
+ Wanted = dependencies(false, Enabled, AllPlugins),
+ WantedPlugins = lookup_plugins(Wanted, AllPlugins),
+ {ValidPlugins, Problems} = validate_plugins(WantedPlugins),
+ maybe_warn_about_invalid_plugins(Problems),
+ case filelib:ensure_dir(ExpandDir ++ "/") of
+ ok -> ok;
+ {error, E2} -> throw({error, {cannot_create_plugins_expand_dir,
+ [ExpandDir, E2]}})
+ end,
+ [prepare_plugin(Plugin, ExpandDir) || Plugin <- ValidPlugins],
+ Wanted.
+
+maybe_warn_about_invalid_plugins([]) ->
+ ok;
+maybe_warn_about_invalid_plugins(InvalidPlugins) ->
+ %% TODO: error message formatting
+ rabbit_log:warning(format_invalid_plugins(InvalidPlugins)).
+
+
+format_invalid_plugins(InvalidPlugins) ->
+ lists:flatten(["Failed to enable some plugins: \r\n"
+ | [format_invalid_plugin(Plugin)
+ || Plugin <- InvalidPlugins]]).
+
+format_invalid_plugin({Name, Errors}) ->
+ [io_lib:format(" ~p:~n", [Name])
+ | [format_invalid_plugin_error(Err) || Err <- Errors]].
+
+format_invalid_plugin_error({missing_dependency, Dep}) ->
+ io_lib:format(" Dependency is missing or invalid: ~p~n", [Dep]);
+%% a plugin doesn't support the effective broker version
+format_invalid_plugin_error({broker_version_mismatch, Version, Required}) ->
+ io_lib:format(" Plugin doesn't support current server version."
+ " Actual broker version: ~p, supported by the plugin: ~p~n",
+ [Version, format_required_versions(Required)]);
+%% one of dependencies of a plugin doesn't match its version requirements
+format_invalid_plugin_error({{dependency_version_mismatch, Version, Required}, Name}) ->
+ io_lib:format(" Version '~p' of dependency '~p' is unsupported."
+ " Version ranges supported by the plugin: ~p~n",
+ [Version, Name, Required]);
+format_invalid_plugin_error(Err) ->
+ io_lib:format(" Unknown error ~p~n", [Err]).
+
+format_required_versions(Versions) ->
+ lists:map(fun(V) ->
+ case re:run(V, "^[0-9]*\.[0-9]*\.", [{capture, all, list}]) of
+ {match, [Sub]} ->
+ lists:flatten(io_lib:format("~s-~sx", [V, Sub]));
+ _ ->
+ V
+ end
+ end, Versions).
+
+validate_plugins(Plugins) ->
+ application:load(rabbit),
+ RabbitVersion = RabbitVersion = case application:get_key(rabbit, vsn) of
+ undefined -> "0.0.0";
+ {ok, Val} -> Val
+ end,
+ validate_plugins(Plugins, RabbitVersion).
+
+validate_plugins(Plugins, BrokerVersion) ->
+ lists:foldl(
+ fun(#plugin{name = Name,
+ broker_version_requirements = BrokerVersionReqs,
+ dependency_version_requirements = DepsVersions} = Plugin,
+ {Plugins0, Errors}) ->
+ case is_version_supported(BrokerVersion, BrokerVersionReqs) of
+ true ->
+ case BrokerVersion of
+ "0.0.0" ->
+ rabbit_log:warning(
+ "Running development version of the broker."
+ " Requirement ~p for plugin ~p is ignored.",
+ [BrokerVersionReqs, Name]);
+ _ -> ok
+ end,
+ case check_plugins_versions(Name, Plugins0, DepsVersions) of
+ ok -> {[Plugin | Plugins0], Errors};
+ {error, Err} -> {Plugins0, [{Name, Err} | Errors]}
+ end;
+ false ->
+ Error = [{broker_version_mismatch, BrokerVersion, BrokerVersionReqs}],
+ {Plugins0, [{Name, Error} | Errors]}
+ end
+ end,
+ {[],[]},
+ Plugins).
+
+check_plugins_versions(PluginName, AllPlugins, RequiredVersions) ->
+ ExistingVersions = [{Name, Vsn}
+ || #plugin{name = Name, version = Vsn} <- AllPlugins],
+ Problems = lists:foldl(
+ fun({Name, Versions}, Acc) ->
+ case proplists:get_value(Name, ExistingVersions) of
+ undefined -> [{missing_dependency, Name} | Acc];
+ Version ->
+ case is_version_supported(Version, Versions) of
+ true ->
+ case Version of
+ "" ->
+ rabbit_log:warning(
+ "~p plugin version is not defined."
+ " Requirement ~p for plugin ~p is ignored",
+ [Versions, PluginName]);
+ _ -> ok
+ end,
+ Acc;
+ false ->
+ [{{dependency_version_mismatch, Version, Versions}, Name} | Acc]
+ end
+ end
+ end,
+ [],
+ RequiredVersions),
+ case Problems of
+ [] -> ok;
+ _ -> {error, Problems}
+ end.
+
+is_version_supported("", _) -> true;
+is_version_supported("0.0.0", _) -> true;
+is_version_supported(_Version, []) -> true;
+is_version_supported(VersionFull, ExpectedVersions) ->
+ %% Pre-release version should be supported in plugins,
+ %% therefore preview part should be removed
+ Version = remove_version_preview_part(VersionFull),
+ case lists:any(fun(ExpectedVersion) ->
+ rabbit_misc:strict_version_minor_equivalent(ExpectedVersion,
+ Version)
+ andalso
+ rabbit_misc:version_compare(ExpectedVersion, Version, lte)
+ end,
+ ExpectedVersions) of
+ true -> true;
+ false -> false
+ end.
+
+remove_version_preview_part(Version) ->
+ {Ver, _Preview} = rabbit_semver:parse(Version),
+ iolist_to_binary(rabbit_semver:format({Ver, {[], []}})).
+
+clean_plugins(Plugins) ->
+ ExpandDir = plugins_expand_dir(),
+ [clean_plugin(Plugin, ExpandDir) || Plugin <- Plugins].
+
+clean_plugin(Plugin, ExpandDir) ->
+ {ok, Mods} = application:get_key(Plugin, modules),
+ application:unload(Plugin),
+ [begin
+ code:soft_purge(Mod),
+ code:delete(Mod),
+ false = code:is_loaded(Mod)
+ end || Mod <- Mods],
+ delete_recursively(rabbit_misc:format("~s/~s", [ExpandDir, Plugin])).
+
+prepare_dir_plugin(PluginAppDescPath) ->
+ PluginEbinDir = filename:dirname(PluginAppDescPath),
+ Plugin = filename:basename(PluginAppDescPath, ".app"),
+ code:add_patha(PluginEbinDir),
+ case filelib:wildcard(PluginEbinDir++ "/*.beam") of
+ [] ->
+ ok;
+ [BeamPath | _] ->
+ Module = list_to_atom(filename:basename(BeamPath, ".beam")),
+ case code:ensure_loaded(Module) of
+ {module, _} ->
+ ok;
+ {error, badfile} ->
+ rabbit_log:error("Failed to enable plugin \"~s\": "
+ "it may have been built with an "
+ "incompatible (more recent?) "
+ "version of Erlang~n", [Plugin]),
+ throw({plugin_built_with_incompatible_erlang, Plugin});
+ Error ->
+ throw({plugin_module_unloadable, Plugin, Error})
+ end
+ end.
+
+%%----------------------------------------------------------------------------
+
+delete_recursively(Fn) ->
+ case rabbit_file:recursive_delete([Fn]) of
+ ok -> ok;
+ {error, {Path, E}} -> {error, {cannot_delete, Path, E}}
+ end.
+
+find_unzipped_app_file(ExpandDir, Files) ->
+ StripComponents = length(filename:split(ExpandDir)),
+ [ X || X <- Files,
+ [_AppName, "ebin", MaybeAppFile] <-
+ [lists:nthtail(StripComponents, filename:split(X))],
+ lists:suffix(".app", MaybeAppFile)
+ ].
+
+prepare_plugin(#plugin{type = ez, name = Name, location = Location}, ExpandDir) ->
+ case zip:unzip(Location, [{cwd, ExpandDir}]) of
+ {ok, Files} ->
+ case find_unzipped_app_file(ExpandDir, Files) of
+ [PluginAppDescPath|_] ->
+ prepare_dir_plugin(PluginAppDescPath);
+ _ ->
+ rabbit_log:error("Plugin archive '~s' doesn't contain an .app file~n", [Location]),
+ throw({app_file_missing, Name, Location})
+ end;
+ {error, Reason} ->
+ rabbit_log:error("Could not unzip plugin archive '~s': ~p~n", [Location, Reason]),
+ throw({failed_to_unzip_plugin, Name, Location, Reason})
+ end;
+prepare_plugin(#plugin{type = dir, location = Location, name = Name},
+ _ExpandDir) ->
+ case filelib:wildcard(Location ++ "/ebin/*.app") of
+ [PluginAppDescPath|_] ->
+ prepare_dir_plugin(PluginAppDescPath);
+ _ ->
+ rabbit_log:error("Plugin directory '~s' doesn't contain an .app file~n", [Location]),
+ throw({app_file_missing, Name, Location})
+ end.
+
+plugin_info({ez, EZ}) ->
+ case read_app_file(EZ) of
+ {application, Name, Props} -> mkplugin(Name, Props, ez, EZ);
+ {error, Reason} -> {error, EZ, Reason}
+ end;
+plugin_info({app, App}) ->
+ case rabbit_file:read_term_file(App) of
+ {ok, [{application, Name, Props}]} ->
+ mkplugin(Name, Props, dir,
+ filename:absname(
+ filename:dirname(filename:dirname(App))));
+ {error, Reason} ->
+ {error, App, {invalid_app, Reason}}
+ end.
+
+mkplugin(Name, Props, Type, Location) ->
+ Version = proplists:get_value(vsn, Props, "0"),
+ Description = proplists:get_value(description, Props, ""),
+ Dependencies = proplists:get_value(applications, Props, []),
+ BrokerVersions = proplists:get_value(broker_version_requirements, Props, []),
+ DepsVersions = proplists:get_value(dependency_version_requirements, Props, []),
+ #plugin{name = Name, version = Version, description = Description,
+ dependencies = Dependencies, location = Location, type = Type,
+ broker_version_requirements = BrokerVersions,
+ dependency_version_requirements = DepsVersions}.
+
+read_app_file(EZ) ->
+ case zip:list_dir(EZ) of
+ {ok, [_|ZippedFiles]} ->
+ case find_app_files(ZippedFiles) of
+ [AppPath|_] ->
+ {ok, [{AppPath, AppFile}]} =
+ zip:extract(EZ, [{file_list, [AppPath]}, memory]),
+ parse_binary(AppFile);
+ [] ->
+ {error, no_app_file}
+ end;
+ {error, Reason} ->
+ {error, {invalid_ez, Reason}}
+ end.
+
+find_app_files(ZippedFiles) ->
+ {ok, RE} = re:compile("^.*/ebin/.*.app$"),
+ [Path || {zip_file, Path, _, _, _, _} <- ZippedFiles,
+ re:run(Path, RE, [{capture, none}]) =:= match].
+
+parse_binary(Bin) ->
+ try
+ {ok, Ts, _} = erl_scan:string(binary_to_list(Bin)),
+ {ok, Term} = erl_parse:parse_term(Ts),
+ Term
+ catch
+ Err -> {error, {invalid_app, Err}}
+ end.
+
+plugin_names(Plugins) ->
+ [Name || #plugin{name = Name} <- Plugins].
+
+lookup_plugins(Names, AllPlugins) ->
+ %% Preserve order of Names
+ lists:map(
+ fun(Name) ->
+ lists:keyfind(Name, #plugin.name, AllPlugins)
+ end,
+ Names).
+
+%% Split PATH-like value into its components.
+split_path(PathString) ->
+ Delimiters = case os:type() of
+ {unix, _} -> ":";
+ {win32, _} -> ";"
+ end,
+ string:tokens(PathString, Delimiters).
+
+%% Search for files using glob in a given dir. Returns full filenames of those files.
+full_path_wildcard(Glob, Dir) ->
+ [filename:join([Dir, File]) || File <- filelib:wildcard(Glob, Dir)].
+
+%% Returns list off all .ez files in a given set of directories
+list_ezs([]) ->
+ [];
+list_ezs([Dir|Rest]) ->
+ [{ez, EZ} || EZ <- full_path_wildcard("*.ez", Dir)] ++ list_ezs(Rest).
+
+%% Returns list of all files that look like OTP applications in a
+%% given set of directories.
+list_free_apps([]) ->
+ [];
+list_free_apps([Dir|Rest]) ->
+ [{app, App} || App <- full_path_wildcard("*/ebin/*.app", Dir)]
+ ++ list_free_apps(Rest).
+
+compare_by_name_and_version(#plugin{name = Name, version = VersionA},
+ #plugin{name = Name, version = VersionB}) ->
+ rabbit_semver:lte(VersionA, VersionB);
+compare_by_name_and_version(#plugin{name = NameA},
+ #plugin{name = NameB}) ->
+ NameA =< NameB.
+
+-spec discover_plugins([Directory]) -> {[#plugin{}], [Problem]} when
+ Directory :: file:name(),
+ Problem :: {file:name(), term()}.
+discover_plugins(PluginsDirs) ->
+ EZs = list_ezs(PluginsDirs),
+ FreeApps = list_free_apps(PluginsDirs),
+ read_plugins_info(EZs ++ FreeApps, {[], []}).
+
+read_plugins_info([], Acc) ->
+ Acc;
+read_plugins_info([Path|Paths], {Plugins, Problems}) ->
+ case plugin_info(Path) of
+ #plugin{} = Plugin ->
+ read_plugins_info(Paths, {[Plugin|Plugins], Problems});
+ {error, Location, Reason} ->
+ read_plugins_info(Paths, {Plugins, [{Location, Reason}|Problems]})
+ end.
+
+remove_duplicate_plugins(Plugins) ->
+ %% Reverse order ensures that if there are several versions of the
+ %% same plugin, the most recent one comes first.
+ Sorted = lists:reverse(
+ lists:sort(fun compare_by_name_and_version/2, Plugins)),
+ remove_duplicate_plugins(Sorted, {[], []}).
+
+remove_duplicate_plugins([], Acc) ->
+ Acc;
+remove_duplicate_plugins([Best = #plugin{name = Name}, Offender = #plugin{name = Name} | Rest],
+ {Plugins0, Problems0}) ->
+ Problems1 = [{Offender#plugin.location, duplicate_plugin}|Problems0],
+ remove_duplicate_plugins([Best|Rest], {Plugins0, Problems1});
+remove_duplicate_plugins([Plugin|Rest], {Plugins0, Problems0}) ->
+ Plugins1 = [Plugin|Plugins0],
+ remove_duplicate_plugins(Rest, {Plugins1, Problems0}).
+
+maybe_keep_required_deps(true, Plugins) ->
+ Plugins;
+maybe_keep_required_deps(false, Plugins) ->
+ RabbitDeps = list_all_deps([rabbit]),
+ lists:filter(fun
+ (#plugin{name = Name}) ->
+ not lists:member(Name, RabbitDeps);
+ (Name) when is_atom(Name) ->
+ not lists:member(Name, RabbitDeps)
+ end,
+ Plugins).
+
+list_all_deps(Applications) ->
+ list_all_deps(Applications, []).
+
+list_all_deps([Application | Applications], Deps) ->
+ %% We load the application to be sure we can get the "applications" key.
+ %% This is required for rabbitmq-plugins for instance.
+ application:load(Application),
+ NewDeps = [Application | Deps],
+ case application:get_key(Application, applications) of
+ {ok, ApplicationDeps} ->
+ RemainingApplications0 = ApplicationDeps ++ Applications,
+ RemainingApplications = RemainingApplications0 -- NewDeps,
+ list_all_deps(RemainingApplications, NewDeps);
+ undefined ->
+ list_all_deps(Applications, NewDeps)
+ end;
+list_all_deps([], Deps) ->
+ Deps.
+
+remove_plugins(Plugins) ->
+ %% We want to filter out all Erlang applications in the plugins
+ %% directories which are not actual RabbitMQ plugin.
+ %%
+ %% A RabbitMQ plugin must depend on `rabbit`. We also want to keep
+ %% all applications they depend on, except Erlang/OTP applications.
+ %% In the end, we will skip:
+ %% * Erlang/OTP applications
+ %% * All applications which do not depend on `rabbit` and which
+ %% are not direct or indirect dependencies of plugins.
+ ActualPlugins = [Plugin
+ || #plugin{dependencies = Deps} = Plugin <- Plugins,
+ lists:member(rabbit, Deps)],
+ %% As said above, we want to keep all non-plugins which are
+ %% dependencies of plugins.
+ PluginDeps = lists:usort(
+ lists:flatten(
+ [resolve_deps(Plugins, Plugin)
+ || Plugin <- ActualPlugins])),
+ lists:filter(
+ fun(#plugin{name = Name} = Plugin) ->
+ IsOTPApp = is_plugin_provided_by_otp(Plugin),
+ IsAPlugin =
+ lists:member(Plugin, ActualPlugins) orelse
+ lists:member(Name, PluginDeps),
+ if
+ IsOTPApp ->
+ rabbit_log:debug(
+ "Plugins discovery: "
+ "ignoring ~s, Erlang/OTP application",
+ [Name]);
+ not IsAPlugin ->
+ rabbit_log:debug(
+ "Plugins discovery: "
+ "ignoring ~s, not a RabbitMQ plugin",
+ [Name]);
+ true ->
+ ok
+ end,
+ not (IsOTPApp orelse not IsAPlugin)
+ end, Plugins).
+
+resolve_deps(Plugins, #plugin{dependencies = Deps}) ->
+ IndirectDeps = [case lists:keyfind(Dep, #plugin.name, Plugins) of
+ false -> [];
+ DepPlugin -> resolve_deps(Plugins, DepPlugin)
+ end
+ || Dep <- Deps],
+ Deps ++ IndirectDeps.
+
+maybe_report_plugin_loading_problems([]) ->
+ ok;
+maybe_report_plugin_loading_problems(Problems) ->
+ io:format(standard_error,
+ "Problem reading some plugins: ~p~n",
+ [Problems]).
diff --git a/deps/rabbit/src/rabbit_policies.erl b/deps/rabbit/src/rabbit_policies.erl
new file mode 100644
index 0000000000..54e4d2c03e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_policies.erl
@@ -0,0 +1,179 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_policies).
+
+%% Provides built-in policy parameter
+%% validation functions.
+
+-behaviour(rabbit_policy_validator).
+-behaviour(rabbit_policy_merge_strategy).
+
+-include("rabbit.hrl").
+
+-export([register/0, validate_policy/1, merge_policy_value/3]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "internal policies"},
+ {mfa, {rabbit_policies, register, []}},
+ {requires, rabbit_registry},
+ {enables, recovery}]}).
+
+register() ->
+ %% Note: there are more validators registered from other modules,
+ %% such as rabbit_mirror_queue_misc
+ [rabbit_registry:register(Class, Name, ?MODULE) ||
+ {Class, Name} <- [{policy_validator, <<"alternate-exchange">>},
+ {policy_validator, <<"dead-letter-exchange">>},
+ {policy_validator, <<"dead-letter-routing-key">>},
+ {policy_validator, <<"message-ttl">>},
+ {policy_validator, <<"expires">>},
+ {policy_validator, <<"max-length">>},
+ {policy_validator, <<"max-length-bytes">>},
+ {policy_validator, <<"max-in-memory-length">>},
+ {policy_validator, <<"max-in-memory-bytes">>},
+ {policy_validator, <<"queue-mode">>},
+ {policy_validator, <<"overflow">>},
+ {policy_validator, <<"delivery-limit">>},
+ {policy_validator, <<"max-age">>},
+ {policy_validator, <<"max-segment-size">>},
+ {policy_validator, <<"queue-leader-locator">>},
+ {policy_validator, <<"initial-cluster-size">>},
+ {operator_policy_validator, <<"expires">>},
+ {operator_policy_validator, <<"message-ttl">>},
+ {operator_policy_validator, <<"max-length">>},
+ {operator_policy_validator, <<"max-length-bytes">>},
+ {operator_policy_validator, <<"max-in-memory-length">>},
+ {operator_policy_validator, <<"max-in-memory-bytes">>},
+ {operator_policy_validator, <<"delivery-limit">>},
+ {policy_merge_strategy, <<"expires">>},
+ {policy_merge_strategy, <<"message-ttl">>},
+ {policy_merge_strategy, <<"max-length">>},
+ {policy_merge_strategy, <<"max-length-bytes">>},
+ {policy_merge_strategy, <<"max-in-memory-length">>},
+ {policy_merge_strategy, <<"max-in-memory-bytes">>},
+ {policy_merge_strategy, <<"delivery-limit">>}]],
+ ok.
+
+-spec validate_policy([{binary(), term()}]) -> rabbit_policy_validator:validate_results().
+
+validate_policy(Terms) ->
+ lists:foldl(fun ({Key, Value}, ok) -> validate_policy0(Key, Value);
+ (_, Error) -> Error
+ end, ok, Terms).
+
+validate_policy0(<<"alternate-exchange">>, Value)
+ when is_binary(Value) ->
+ ok;
+validate_policy0(<<"alternate-exchange">>, Value) ->
+ {error, "~p is not a valid alternate exchange name", [Value]};
+
+validate_policy0(<<"dead-letter-exchange">>, Value)
+ when is_binary(Value) ->
+ ok;
+validate_policy0(<<"dead-letter-exchange">>, Value) ->
+ {error, "~p is not a valid dead letter exchange name", [Value]};
+
+validate_policy0(<<"dead-letter-routing-key">>, Value)
+ when is_binary(Value) ->
+ ok;
+validate_policy0(<<"dead-letter-routing-key">>, Value) ->
+ {error, "~p is not a valid dead letter routing key", [Value]};
+
+validate_policy0(<<"message-ttl">>, Value)
+ when is_integer(Value), Value >= 0 ->
+ ok;
+validate_policy0(<<"message-ttl">>, Value) ->
+ {error, "~p is not a valid message TTL", [Value]};
+
+validate_policy0(<<"expires">>, Value)
+ when is_integer(Value), Value >= 1 ->
+ ok;
+validate_policy0(<<"expires">>, Value) ->
+ {error, "~p is not a valid queue expiry", [Value]};
+
+validate_policy0(<<"max-length">>, Value)
+ when is_integer(Value), Value >= 0 ->
+ ok;
+validate_policy0(<<"max-length">>, Value) ->
+ {error, "~p is not a valid maximum length", [Value]};
+
+validate_policy0(<<"max-length-bytes">>, Value)
+ when is_integer(Value), Value >= 0 ->
+ ok;
+validate_policy0(<<"max-length-bytes">>, Value) ->
+ {error, "~p is not a valid maximum length in bytes", [Value]};
+
+validate_policy0(<<"max-in-memory-length">>, Value)
+ when is_integer(Value), Value >= 0 ->
+ ok;
+validate_policy0(<<"max-in-memory-length">>, Value) ->
+ {error, "~p is not a valid maximum memory in bytes", [Value]};
+
+validate_policy0(<<"max-in-memory-bytes">>, Value)
+ when is_integer(Value), Value >= 0 ->
+ ok;
+validate_policy0(<<"max-in-memory-bytes">>, Value) ->
+ {error, "~p is not a valid maximum memory in bytes", [Value]};
+
+validate_policy0(<<"queue-mode">>, <<"default">>) ->
+ ok;
+validate_policy0(<<"queue-mode">>, <<"lazy">>) ->
+ ok;
+validate_policy0(<<"queue-mode">>, Value) ->
+ {error, "~p is not a valid queue-mode value", [Value]};
+validate_policy0(<<"overflow">>, <<"drop-head">>) ->
+ ok;
+validate_policy0(<<"overflow">>, <<"reject-publish">>) ->
+ ok;
+validate_policy0(<<"overflow">>, <<"reject-publish-dlx">>) ->
+ ok;
+validate_policy0(<<"overflow">>, Value) ->
+ {error, "~p is not a valid overflow value", [Value]};
+
+validate_policy0(<<"delivery-limit">>, Value)
+ when is_integer(Value), Value >= 0 ->
+ ok;
+validate_policy0(<<"delivery-limit">>, Value) ->
+ {error, "~p is not a valid delivery limit", [Value]};
+
+validate_policy0(<<"max-age">>, Value) ->
+ case rabbit_amqqueue:check_max_age(Value) of
+ {error, _} ->
+ {error, "~p is not a valid max age", [Value]};
+ _ ->
+ ok
+ end;
+
+validate_policy0(<<"queue-leader-locator">>, <<"client-local">>) ->
+ ok;
+validate_policy0(<<"queue-leader-locator">>, <<"random">>) ->
+ ok;
+validate_policy0(<<"queue-leader-locator">>, <<"least-leaders">>) ->
+ ok;
+validate_policy0(<<"queue-leader-locator">>, Value) ->
+ {error, "~p is not a valid queue leader locator value", [Value]};
+
+validate_policy0(<<"initial-cluster-size">>, Value)
+ when is_integer(Value), Value >= 0 ->
+ ok;
+validate_policy0(<<"initial-cluster-size">>, Value) ->
+ {error, "~p is not a valid cluster size", [Value]};
+
+validate_policy0(<<"max-segment-size">>, Value)
+ when is_integer(Value), Value >= 0 ->
+ ok;
+validate_policy0(<<"max-segment-size">>, Value) ->
+ {error, "~p is not a valid segment size", [Value]}.
+
+merge_policy_value(<<"message-ttl">>, Val, OpVal) -> min(Val, OpVal);
+merge_policy_value(<<"max-length">>, Val, OpVal) -> min(Val, OpVal);
+merge_policy_value(<<"max-length-bytes">>, Val, OpVal) -> min(Val, OpVal);
+merge_policy_value(<<"max-in-memory-length">>, Val, OpVal) -> min(Val, OpVal);
+merge_policy_value(<<"max-in-memory-bytes">>, Val, OpVal) -> min(Val, OpVal);
+merge_policy_value(<<"expires">>, Val, OpVal) -> min(Val, OpVal);
+merge_policy_value(<<"delivery-limit">>, Val, OpVal) -> min(Val, OpVal).
diff --git a/deps/rabbit/src/rabbit_policy.erl b/deps/rabbit/src/rabbit_policy.erl
new file mode 100644
index 0000000000..44807de97d
--- /dev/null
+++ b/deps/rabbit/src/rabbit_policy.erl
@@ -0,0 +1,557 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_policy).
+
+%% Policies is a way to apply optional arguments ("x-args")
+%% to exchanges and queues in bulk, using name matching.
+%%
+%% Only one policy can apply to a given queue or exchange
+%% at a time. Priorities help determine what policy should
+%% take precedence.
+%%
+%% Policies build on runtime parameters. Policy-driven parameters
+%% are well known and therefore validated.
+%%
+%% See also:
+%%
+%% * rabbit_runtime_parameters
+%% * rabbit_policies
+%% * rabbit_registry
+
+%% TODO specs
+
+-behaviour(rabbit_runtime_parameter).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-import(rabbit_misc, [pget/2, pget/3]).
+
+-export([register/0]).
+-export([invalidate/0, recover/0]).
+-export([name/1, name_op/1, effective_definition/1, merge_operator_definitions/2, get/2, get_arg/3, set/1]).
+-export([validate/5, notify/5, notify_clear/4]).
+-export([parse_set/7, set/7, delete/3, lookup/2, list/0, list/1,
+ list_formatted/1, list_formatted/3, info_keys/0]).
+-export([parse_set_op/7, set_op/7, delete_op/3, lookup_op/2, list_op/0, list_op/1,
+ list_formatted_op/1, list_formatted_op/3]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "policy parameters"},
+ {mfa, {rabbit_policy, register, []}},
+ {requires, rabbit_registry},
+ {enables, recovery}]}).
+
+register() ->
+ rabbit_registry:register(runtime_parameter, <<"policy">>, ?MODULE),
+ rabbit_registry:register(runtime_parameter, <<"operator_policy">>, ?MODULE).
+
+name(Q) when ?is_amqqueue(Q) ->
+ Policy = amqqueue:get_policy(Q),
+ name0(Policy);
+name(#exchange{policy = Policy}) -> name0(Policy).
+
+name_op(Q) when ?is_amqqueue(Q) ->
+ OpPolicy = amqqueue:get_operator_policy(Q),
+ name0(OpPolicy);
+name_op(#exchange{operator_policy = Policy}) -> name0(Policy).
+
+name0(undefined) -> none;
+name0(Policy) -> pget(name, Policy).
+
+effective_definition(Q) when ?is_amqqueue(Q) ->
+ Policy = amqqueue:get_policy(Q),
+ OpPolicy = amqqueue:get_operator_policy(Q),
+ merge_operator_definitions(Policy, OpPolicy);
+effective_definition(#exchange{policy = Policy, operator_policy = OpPolicy}) ->
+ merge_operator_definitions(Policy, OpPolicy).
+
+merge_operator_definitions(undefined, undefined) -> undefined;
+merge_operator_definitions(Policy, undefined) -> pget(definition, Policy);
+merge_operator_definitions(undefined, OpPolicy) -> pget(definition, OpPolicy);
+merge_operator_definitions(Policy, OpPolicy) ->
+ OpDefinition = rabbit_data_coercion:to_map(pget(definition, OpPolicy, [])),
+ Definition = rabbit_data_coercion:to_map(pget(definition, Policy, [])),
+ Keys = maps:keys(Definition),
+ OpKeys = maps:keys(OpDefinition),
+ lists:map(fun(Key) ->
+ case {maps:get(Key, Definition, undefined), maps:get(Key, OpDefinition, undefined)} of
+ {Val, undefined} -> {Key, Val};
+ {undefined, OpVal} -> {Key, OpVal};
+ {Val, OpVal} -> {Key, merge_policy_value(Key, Val, OpVal)}
+ end
+ end,
+ lists:umerge(Keys, OpKeys)).
+
+set(Q0) when ?is_amqqueue(Q0) ->
+ Name = amqqueue:get_name(Q0),
+ Policy = match(Name),
+ OpPolicy = match_op(Name),
+ Q1 = amqqueue:set_policy(Q0, Policy),
+ Q2 = amqqueue:set_operator_policy(Q1, OpPolicy),
+ Q2;
+set(X = #exchange{name = Name}) ->
+ X#exchange{policy = match(Name), operator_policy = match_op(Name)}.
+
+match(Name = #resource{virtual_host = VHost}) ->
+ match(Name, list(VHost)).
+
+match_op(Name = #resource{virtual_host = VHost}) ->
+ match(Name, list_op(VHost)).
+
+get(Name, Q) when ?is_amqqueue(Q) ->
+ Policy = amqqueue:get_policy(Q),
+ OpPolicy = amqqueue:get_operator_policy(Q),
+ get0(Name, Policy, OpPolicy);
+get(Name, #exchange{policy = Policy, operator_policy = OpPolicy}) ->
+ get0(Name, Policy, OpPolicy);
+
+%% Caution - SLOW.
+get(Name, EntityName = #resource{virtual_host = VHost}) ->
+ get0(Name,
+ match(EntityName, list(VHost)),
+ match(EntityName, list_op(VHost))).
+
+get0(_Name, undefined, undefined) -> undefined;
+get0(Name, undefined, OpPolicy) -> pget(Name, pget(definition, OpPolicy, []));
+get0(Name, Policy, undefined) -> pget(Name, pget(definition, Policy, []));
+get0(Name, Policy, OpPolicy) ->
+ OpDefinition = pget(definition, OpPolicy, []),
+ Definition = pget(definition, Policy, []),
+ case {pget(Name, Definition), pget(Name, OpDefinition)} of
+ {undefined, undefined} -> undefined;
+ {Val, undefined} -> Val;
+ {undefined, Val} -> Val;
+ {Val, OpVal} -> merge_policy_value(Name, Val, OpVal)
+ end.
+
+merge_policy_value(Name, PolicyVal, OpVal) ->
+ case policy_merge_strategy(Name) of
+ {ok, Module} -> Module:merge_policy_value(Name, PolicyVal, OpVal);
+ {error, not_found} -> rabbit_policies:merge_policy_value(Name, PolicyVal, OpVal)
+ end.
+
+policy_merge_strategy(Name) ->
+ case rabbit_registry:binary_to_type(rabbit_data_coercion:to_binary(Name)) of
+ {error, not_found} ->
+ {error, not_found};
+ T ->
+ rabbit_registry:lookup_module(policy_merge_strategy, T)
+ end.
+
+%% Many heads for optimisation
+get_arg(_AName, _PName, #exchange{arguments = [], policy = undefined}) ->
+ undefined;
+get_arg(_AName, PName, X = #exchange{arguments = []}) ->
+ get(PName, X);
+get_arg(AName, PName, X = #exchange{arguments = Args}) ->
+ case rabbit_misc:table_lookup(Args, AName) of
+ undefined -> get(PName, X);
+ {_Type, Arg} -> Arg
+ end.
+
+%%----------------------------------------------------------------------------
+
+%% Gets called during upgrades - therefore must not assume anything about the
+%% state of Mnesia
+invalidate() ->
+ rabbit_file:write_file(invalid_file(), <<"">>).
+
+recover() ->
+ case rabbit_file:is_file(invalid_file()) of
+ true -> recover0(),
+ rabbit_file:delete(invalid_file());
+ false -> ok
+ end.
+
+%% To get here we have to have just completed an Mnesia upgrade - i.e. we are
+%% the first node starting. So we can rewrite the whole database. Note that
+%% recovery has not yet happened; we must work with the rabbit_durable_<thing>
+%% variants.
+recover0() ->
+ Xs = mnesia:dirty_match_object(rabbit_durable_exchange, #exchange{_ = '_'}),
+ Qs = rabbit_amqqueue:list_with_possible_retry(
+ fun() ->
+ mnesia:dirty_match_object(
+ rabbit_durable_queue, amqqueue:pattern_match_all())
+ end),
+ Policies = list(),
+ OpPolicies = list_op(),
+ [rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ mnesia:write(
+ rabbit_durable_exchange,
+ rabbit_exchange_decorator:set(
+ X#exchange{policy = match(Name, Policies),
+ operator_policy = match(Name, OpPolicies)}),
+ write)
+ end) || X = #exchange{name = Name} <- Xs],
+ [begin
+ QName = amqqueue:get_name(Q0),
+ Policy1 = match(QName, Policies),
+ Q1 = amqqueue:set_policy(Q0, Policy1),
+ OpPolicy1 = match(QName, OpPolicies),
+ Q2 = amqqueue:set_operator_policy(Q1, OpPolicy1),
+ Q3 = rabbit_queue_decorator:set(Q2),
+ ?try_mnesia_tx_or_upgrade_amqqueue_and_retry(
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ mnesia:write(rabbit_durable_queue, Q3, write)
+ end),
+ begin
+ Q4 = amqqueue:upgrade(Q3),
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ mnesia:write(rabbit_durable_queue, Q4, write)
+ end)
+ end)
+ end || Q0 <- Qs],
+ ok.
+
+invalid_file() ->
+ filename:join(rabbit_mnesia:dir(), "policies_are_invalid").
+
+%%----------------------------------------------------------------------------
+
+parse_set_op(VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) ->
+ parse_set(<<"operator_policy">>, VHost, Name, Pattern, Definition, Priority,
+ ApplyTo, ActingUser).
+
+parse_set(VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) ->
+ parse_set(<<"policy">>, VHost, Name, Pattern, Definition, Priority, ApplyTo,
+ ActingUser).
+
+parse_set(Type, VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) ->
+ try rabbit_data_coercion:to_integer(Priority) of
+ Num -> parse_set0(Type, VHost, Name, Pattern, Definition, Num, ApplyTo,
+ ActingUser)
+ catch
+ error:badarg -> {error, "~p priority must be a number", [Priority]}
+ end.
+
+parse_set0(Type, VHost, Name, Pattern, Defn, Priority, ApplyTo, ActingUser) ->
+ case rabbit_json:try_decode(Defn) of
+ {ok, Term} ->
+ R = set0(Type, VHost, Name,
+ [{<<"pattern">>, Pattern},
+ {<<"definition">>, maps:to_list(Term)},
+ {<<"priority">>, Priority},
+ {<<"apply-to">>, ApplyTo}],
+ ActingUser),
+ rabbit_log:info("Successfully set policy '~s' matching ~s names in virtual host '~s' using pattern '~s'",
+ [Name, ApplyTo, VHost, Pattern]),
+ R;
+ {error, Reason} ->
+ {error_string,
+ rabbit_misc:format("JSON decoding error. Reason: ~ts", [Reason])}
+ end.
+
+set_op(VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) ->
+ set(<<"operator_policy">>, VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser).
+
+set(VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) ->
+ set(<<"policy">>, VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser).
+
+set(Type, VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) ->
+ PolicyProps = [{<<"pattern">>, Pattern},
+ {<<"definition">>, Definition},
+ {<<"priority">>, case Priority of
+ undefined -> 0;
+ _ -> Priority
+ end},
+ {<<"apply-to">>, case ApplyTo of
+ undefined -> <<"all">>;
+ _ -> ApplyTo
+ end}],
+ set0(Type, VHost, Name, PolicyProps, ActingUser).
+
+set0(Type, VHost, Name, Term, ActingUser) ->
+ rabbit_runtime_parameters:set_any(VHost, Type, Name, Term, ActingUser).
+
+delete_op(VHost, Name, ActingUser) ->
+ rabbit_runtime_parameters:clear_any(VHost, <<"operator_policy">>, Name, ActingUser).
+
+delete(VHost, Name, ActingUser) ->
+ rabbit_runtime_parameters:clear_any(VHost, <<"policy">>, Name, ActingUser).
+
+lookup_op(VHost, Name) ->
+ case rabbit_runtime_parameters:lookup(VHost, <<"operator_policy">>, Name) of
+ not_found -> not_found;
+ P -> p(P, fun ident/1)
+ end.
+
+lookup(VHost, Name) ->
+ case rabbit_runtime_parameters:lookup(VHost, <<"policy">>, Name) of
+ not_found -> not_found;
+ P -> p(P, fun ident/1)
+ end.
+
+list_op() ->
+ list_op('_').
+
+list_op(VHost) ->
+ list0_op(VHost, fun ident/1).
+
+list_formatted_op(VHost) ->
+ order_policies(list0_op(VHost, fun rabbit_json:encode/1)).
+
+list_formatted_op(VHost, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(AggregatorPid, Ref,
+ fun(P) -> P end, list_formatted_op(VHost)).
+
+list0_op(VHost, DefnFun) ->
+ [p(P, DefnFun)
+ || P <- rabbit_runtime_parameters:list(VHost, <<"operator_policy">>)].
+
+
+list() ->
+ list('_').
+
+list(VHost) ->
+ list0(VHost, fun ident/1).
+
+list_formatted(VHost) ->
+ order_policies(list0(VHost, fun rabbit_json:encode/1)).
+
+list_formatted(VHost, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(AggregatorPid, Ref,
+ fun(P) -> P end, list_formatted(VHost)).
+
+list0(VHost, DefnFun) ->
+ [p(P, DefnFun) || P <- rabbit_runtime_parameters:list(VHost, <<"policy">>)].
+
+order_policies(PropList) ->
+ lists:sort(fun (A, B) -> not sort_pred(A, B) end, PropList).
+
+p(Parameter, DefnFun) ->
+ Value = pget(value, Parameter),
+ [{vhost, pget(vhost, Parameter)},
+ {name, pget(name, Parameter)},
+ {pattern, pget(<<"pattern">>, Value)},
+ {'apply-to', pget(<<"apply-to">>, Value)},
+ {definition, DefnFun(pget(<<"definition">>, Value))},
+ {priority, pget(<<"priority">>, Value)}].
+
+ident(X) -> X.
+
+info_keys() -> [vhost, name, 'apply-to', pattern, definition, priority].
+
+%%----------------------------------------------------------------------------
+
+validate(_VHost, <<"policy">>, Name, Term, _User) ->
+ rabbit_parameter_validation:proplist(
+ Name, policy_validation(), Term);
+validate(_VHost, <<"operator_policy">>, Name, Term, _User) ->
+ rabbit_parameter_validation:proplist(
+ Name, operator_policy_validation(), Term).
+
+notify(VHost, <<"policy">>, Name, Term, ActingUser) ->
+ rabbit_event:notify(policy_set, [{name, Name}, {vhost, VHost},
+ {user_who_performed_action, ActingUser} | Term]),
+ update_policies(VHost);
+notify(VHost, <<"operator_policy">>, Name, Term, ActingUser) ->
+ rabbit_event:notify(policy_set, [{name, Name}, {vhost, VHost},
+ {user_who_performed_action, ActingUser} | Term]),
+ update_policies(VHost).
+
+notify_clear(VHost, <<"policy">>, Name, ActingUser) ->
+ rabbit_event:notify(policy_cleared, [{name, Name}, {vhost, VHost},
+ {user_who_performed_action, ActingUser}]),
+ update_policies(VHost);
+notify_clear(VHost, <<"operator_policy">>, Name, ActingUser) ->
+ rabbit_event:notify(operator_policy_cleared,
+ [{name, Name}, {vhost, VHost},
+ {user_who_performed_action, ActingUser}]),
+ update_policies(VHost).
+
+%%----------------------------------------------------------------------------
+
+%% [1] We need to prevent this from becoming O(n^2) in a similar
+%% manner to rabbit_binding:remove_for_{source,destination}. So see
+%% the comment in rabbit_binding:lock_route_tables/0 for more rationale.
+%% [2] We could be here in a post-tx fun after the vhost has been
+%% deleted; in which case it's fine to do nothing.
+update_policies(VHost) ->
+ Tabs = [rabbit_queue, rabbit_durable_queue,
+ rabbit_exchange, rabbit_durable_exchange],
+ {Xs, Qs} = rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ [mnesia:lock({table, T}, write) || T <- Tabs], %% [1]
+ case catch {list(VHost), list_op(VHost)} of
+ {'EXIT', {throw, {error, {no_such_vhost, _}}}} ->
+ {[], []}; %% [2]
+ {'EXIT', Exit} ->
+ exit(Exit);
+ {Policies, OpPolicies} ->
+ {[update_exchange(X, Policies, OpPolicies) ||
+ X <- rabbit_exchange:list(VHost)],
+ [update_queue(Q, Policies, OpPolicies) ||
+ Q <- rabbit_amqqueue:list(VHost)]}
+ end
+ end),
+ [catch notify(X) || X <- Xs],
+ [catch notify(Q) || Q <- Qs],
+ ok.
+
+update_exchange(X = #exchange{name = XName,
+ policy = OldPolicy,
+ operator_policy = OldOpPolicy},
+ Policies, OpPolicies) ->
+ case {match(XName, Policies), match(XName, OpPolicies)} of
+ {OldPolicy, OldOpPolicy} -> no_change;
+ {NewPolicy, NewOpPolicy} ->
+ NewExchange = rabbit_exchange:update(
+ XName,
+ fun(X0) ->
+ rabbit_exchange_decorator:set(
+ X0 #exchange{policy = NewPolicy,
+ operator_policy = NewOpPolicy})
+ end),
+ case NewExchange of
+ #exchange{} = X1 -> {X, X1};
+ not_found -> {X, X }
+ end
+ end.
+
+update_queue(Q0, Policies, OpPolicies) when ?is_amqqueue(Q0) ->
+ QName = amqqueue:get_name(Q0),
+ OldPolicy = amqqueue:get_policy(Q0),
+ OldOpPolicy = amqqueue:get_operator_policy(Q0),
+ case {match(QName, Policies), match(QName, OpPolicies)} of
+ {OldPolicy, OldOpPolicy} -> no_change;
+ {NewPolicy, NewOpPolicy} ->
+ F = fun (QFun0) ->
+ QFun1 = amqqueue:set_policy(QFun0, NewPolicy),
+ QFun2 = amqqueue:set_operator_policy(QFun1, NewOpPolicy),
+ NewPolicyVersion = amqqueue:get_policy_version(QFun2) + 1,
+ QFun3 = amqqueue:set_policy_version(QFun2, NewPolicyVersion),
+ rabbit_queue_decorator:set(QFun3)
+ end,
+ NewQueue = rabbit_amqqueue:update(QName, F),
+ case NewQueue of
+ Q1 when ?is_amqqueue(Q1) ->
+ {Q0, Q1};
+ not_found ->
+ {Q0, Q0}
+ end
+ end.
+
+notify(no_change)->
+ ok;
+notify({X1 = #exchange{}, X2 = #exchange{}}) ->
+ rabbit_exchange:policy_changed(X1, X2);
+notify({Q1, Q2}) when ?is_amqqueue(Q1), ?is_amqqueue(Q2) ->
+ rabbit_amqqueue:policy_changed(Q1, Q2).
+
+match(Name, Policies) ->
+ case match_all(Name, Policies) of
+ [] -> undefined;
+ [Policy | _] -> Policy
+ end.
+
+match_all(Name, Policies) ->
+ lists:sort(fun sort_pred/2, [P || P <- Policies, matches(Name, P)]).
+
+matches(#resource{name = Name, kind = Kind, virtual_host = VHost} = Resource, Policy) ->
+ matches_type(Kind, pget('apply-to', Policy)) andalso
+ is_applicable(Resource, pget(definition, Policy)) andalso
+ match =:= re:run(Name, pget(pattern, Policy), [{capture, none}]) andalso
+ VHost =:= pget(vhost, Policy).
+
+matches_type(exchange, <<"exchanges">>) -> true;
+matches_type(queue, <<"queues">>) -> true;
+matches_type(exchange, <<"all">>) -> true;
+matches_type(queue, <<"all">>) -> true;
+matches_type(_, _) -> false.
+
+sort_pred(A, B) -> pget(priority, A) >= pget(priority, B).
+
+is_applicable(#resource{kind = queue} = Resource, Policy) ->
+ rabbit_amqqueue:is_policy_applicable(Resource, to_list(Policy));
+is_applicable(_, _) ->
+ true.
+
+to_list(L) when is_list(L) ->
+ L;
+to_list(M) when is_map(M) ->
+ maps:to_list(M).
+
+%%----------------------------------------------------------------------------
+
+operator_policy_validation() ->
+ [{<<"priority">>, fun rabbit_parameter_validation:number/2, mandatory},
+ {<<"pattern">>, fun rabbit_parameter_validation:regex/2, mandatory},
+ {<<"apply-to">>, fun apply_to_validation/2, optional},
+ {<<"definition">>, fun validation_op/2, mandatory}].
+
+policy_validation() ->
+ [{<<"priority">>, fun rabbit_parameter_validation:number/2, mandatory},
+ {<<"pattern">>, fun rabbit_parameter_validation:regex/2, mandatory},
+ {<<"apply-to">>, fun apply_to_validation/2, optional},
+ {<<"definition">>, fun validation/2, mandatory}].
+
+validation_op(Name, Terms) ->
+ validation(Name, Terms, operator_policy_validator).
+
+validation(Name, Terms) ->
+ validation(Name, Terms, policy_validator).
+
+validation(_Name, [], _Validator) ->
+ {error, "no policy provided", []};
+validation(Name, Terms0, Validator) when is_map(Terms0) ->
+ Terms = maps:to_list(Terms0),
+ validation(Name, Terms, Validator);
+validation(_Name, Terms, Validator) when is_list(Terms) ->
+ {Keys, Modules} = lists:unzip(
+ rabbit_registry:lookup_all(Validator)),
+ [] = dups(Keys), %% ASSERTION
+ Validators = lists:zipwith(fun (M, K) -> {M, a2b(K)} end, Modules, Keys),
+ case is_proplist(Terms) of
+ true -> {TermKeys, _} = lists:unzip(Terms),
+ case dups(TermKeys) of
+ [] -> validation0(Validators, Terms);
+ Dup -> {error, "~p duplicate keys not allowed", [Dup]}
+ end;
+ false -> {error, "definition must be a dictionary: ~p", [Terms]}
+ end;
+validation(Name, Term, Validator) ->
+ {error, "parse error while reading policy ~s: ~p. Validator: ~p.",
+ [Name, Term, Validator]}.
+
+validation0(Validators, Terms) ->
+ case lists:foldl(
+ fun (Mod, {ok, TermsLeft}) ->
+ ModKeys = proplists:get_all_values(Mod, Validators),
+ case [T || {Key, _} = T <- TermsLeft,
+ lists:member(Key, ModKeys)] of
+ [] -> {ok, TermsLeft};
+ Scope -> {Mod:validate_policy(Scope), TermsLeft -- Scope}
+ end;
+ (_, Acc) ->
+ Acc
+ end, {ok, Terms}, proplists:get_keys(Validators)) of
+ {ok, []} ->
+ ok;
+ {ok, Unvalidated} ->
+ {error, "~p are not recognised policy settings", [Unvalidated]};
+ {Error, _} ->
+ Error
+ end.
+
+a2b(A) -> list_to_binary(atom_to_list(A)).
+
+dups(L) -> L -- lists:usort(L).
+
+is_proplist(L) -> length(L) =:= length([I || I = {_, _} <- L]).
+
+apply_to_validation(_Name, <<"all">>) -> ok;
+apply_to_validation(_Name, <<"exchanges">>) -> ok;
+apply_to_validation(_Name, <<"queues">>) -> ok;
+apply_to_validation(_Name, Term) ->
+ {error, "apply-to '~s' unrecognised; should be 'queues', 'exchanges' "
+ "or 'all'", [Term]}.
diff --git a/deps/rabbit/src/rabbit_policy_merge_strategy.erl b/deps/rabbit/src/rabbit_policy_merge_strategy.erl
new file mode 100644
index 0000000000..f2b79e5862
--- /dev/null
+++ b/deps/rabbit/src/rabbit_policy_merge_strategy.erl
@@ -0,0 +1,19 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_policy_merge_strategy).
+
+-behaviour(rabbit_registry_class).
+
+-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]).
+
+-callback merge_policy_value(binary(), Value, Value) ->
+ Value
+ when Value :: term().
+
+added_to_rabbit_registry(_Type, _ModuleName) -> ok.
+removed_from_rabbit_registry(_Type) -> ok.
diff --git a/deps/rabbit/src/rabbit_prelaunch_cluster.erl b/deps/rabbit/src/rabbit_prelaunch_cluster.erl
new file mode 100644
index 0000000000..9d3cda99e3
--- /dev/null
+++ b/deps/rabbit/src/rabbit_prelaunch_cluster.erl
@@ -0,0 +1,22 @@
+-module(rabbit_prelaunch_cluster).
+
+-export([setup/1]).
+
+setup(Context) ->
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug("== Clustering =="),
+ rabbit_log_prelaunch:debug("Preparing cluster status files"),
+ rabbit_node_monitor:prepare_cluster_status_files(),
+ case Context of
+ #{initial_pass := true} ->
+ rabbit_log_prelaunch:debug("Upgrading Mnesia schema"),
+ ok = rabbit_upgrade:maybe_upgrade_mnesia();
+ _ ->
+ ok
+ end,
+ %% It's important that the consistency check happens after
+ %% the upgrade, since if we are a secondary node the
+ %% primary node will have forgotten us
+ rabbit_log_prelaunch:debug("Checking cluster consistency"),
+ rabbit_mnesia:check_cluster_consistency(),
+ ok.
diff --git a/deps/rabbit/src/rabbit_prelaunch_enabled_plugins_file.erl b/deps/rabbit/src/rabbit_prelaunch_enabled_plugins_file.erl
new file mode 100644
index 0000000000..57fe32f8e6
--- /dev/null
+++ b/deps/rabbit/src/rabbit_prelaunch_enabled_plugins_file.erl
@@ -0,0 +1,53 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_prelaunch_enabled_plugins_file).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([setup/1]).
+
+setup(Context) ->
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug("== Enabled plugins file =="),
+ update_enabled_plugins_file(Context).
+
+%% -------------------------------------------------------------------
+%% `enabled_plugins` file content initialization.
+%% -------------------------------------------------------------------
+
+update_enabled_plugins_file(#{enabled_plugins := undefined}) ->
+ ok;
+update_enabled_plugins_file(#{enabled_plugins := all,
+ plugins_path := Path} = Context) ->
+ List = [P#plugin.name || P <- rabbit_plugins:list(Path)],
+ do_update_enabled_plugins_file(Context, List);
+update_enabled_plugins_file(#{enabled_plugins := List} = Context) ->
+ do_update_enabled_plugins_file(Context, List).
+
+do_update_enabled_plugins_file(#{enabled_plugins_file := File}, List) ->
+ SortedList = lists:usort(List),
+ case SortedList of
+ [] ->
+ rabbit_log_prelaunch:debug("Marking all plugins as disabled");
+ _ ->
+ rabbit_log_prelaunch:debug(
+ "Marking the following plugins as enabled:"),
+ [rabbit_log_prelaunch:debug(" - ~s", [P]) || P <- SortedList]
+ end,
+ Content = io_lib:format("~p.~n", [SortedList]),
+ case file:write_file(File, Content) of
+ ok ->
+ rabbit_log_prelaunch:debug("Wrote plugins file: ~ts", [File]),
+ ok;
+ {error, Reason} ->
+ rabbit_log_prelaunch:error(
+ "Failed to update enabled plugins file \"~ts\" "
+ "from $RABBITMQ_ENABLED_PLUGINS: ~ts",
+ [File, file:format_error(Reason)]),
+ throw({error, failed_to_update_enabled_plugins_file})
+ end.
diff --git a/deps/rabbit/src/rabbit_prelaunch_feature_flags.erl b/deps/rabbit/src/rabbit_prelaunch_feature_flags.erl
new file mode 100644
index 0000000000..cd7b276f4c
--- /dev/null
+++ b/deps/rabbit/src/rabbit_prelaunch_feature_flags.erl
@@ -0,0 +1,32 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_prelaunch_feature_flags).
+
+-export([setup/1]).
+
+setup(#{feature_flags_file := FFFile}) ->
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug("== Feature flags =="),
+ case filelib:ensure_dir(FFFile) of
+ ok ->
+ rabbit_log_prelaunch:debug("Initializing feature flags registry"),
+ case rabbit_feature_flags:initialize_registry() of
+ ok ->
+ ok;
+ {error, Reason} ->
+ rabbit_log_prelaunch:error(
+ "Failed to initialize feature flags registry: ~p",
+ [Reason]),
+ throw({error, failed_to_initialize_feature_flags_registry})
+ end;
+ {error, Reason} ->
+ rabbit_log_prelaunch:error(
+ "Failed to create feature flags file \"~ts\" directory: ~ts",
+ [FFFile, file:format_error(Reason)]),
+ throw({error, failed_to_create_feature_flags_file_directory})
+ end.
diff --git a/deps/rabbit/src/rabbit_prelaunch_logging.erl b/deps/rabbit/src/rabbit_prelaunch_logging.erl
new file mode 100644
index 0000000000..6e3f040ec5
--- /dev/null
+++ b/deps/rabbit/src/rabbit_prelaunch_logging.erl
@@ -0,0 +1,75 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_prelaunch_logging).
+
+-export([setup/1]).
+
+setup(Context) ->
+ rabbit_log_prelaunch:debug(""),
+ rabbit_log_prelaunch:debug("== Logging =="),
+ ok = set_ERL_CRASH_DUMP_envvar(Context),
+ ok = configure_lager(Context).
+
+set_ERL_CRASH_DUMP_envvar(#{log_base_dir := LogBaseDir}) ->
+ case os:getenv("ERL_CRASH_DUMP") of
+ false ->
+ ErlCrashDump = filename:join(LogBaseDir, "erl_crash.dump"),
+ rabbit_log_prelaunch:debug(
+ "Setting $ERL_CRASH_DUMP environment variable to \"~ts\"",
+ [ErlCrashDump]),
+ os:putenv("ERL_CRASH_DUMP", ErlCrashDump),
+ ok;
+ ErlCrashDump ->
+ rabbit_log_prelaunch:debug(
+ "$ERL_CRASH_DUMP environment variable already set to \"~ts\"",
+ [ErlCrashDump]),
+ ok
+ end.
+
+configure_lager(#{log_base_dir := LogBaseDir,
+ main_log_file := MainLog,
+ upgrade_log_file := UpgradeLog} = Context) ->
+ {SaslErrorLogger,
+ MainLagerHandler,
+ UpgradeLagerHandler} = case MainLog of
+ "-" ->
+ %% Log to STDOUT.
+ rabbit_log_prelaunch:debug(
+ "Logging to stdout"),
+ {tty,
+ tty,
+ tty};
+ _ ->
+ rabbit_log_prelaunch:debug(
+ "Logging to:"),
+ [rabbit_log_prelaunch:debug(
+ " - ~ts", [Log])
+ || Log <- [MainLog, UpgradeLog]],
+ %% Log to file.
+ {false,
+ MainLog,
+ UpgradeLog}
+ end,
+
+ ok = application:set_env(lager, crash_log, "log/crash.log"),
+
+ Fun = fun({App, Var, Value}) ->
+ case application:get_env(App, Var) of
+ undefined -> ok = application:set_env(App, Var, Value);
+ _ -> ok
+ end
+ end,
+ Vars = [{sasl, sasl_error_logger, SaslErrorLogger},
+ {rabbit, lager_log_root, LogBaseDir},
+ {rabbit, lager_default_file, MainLagerHandler},
+ {rabbit, lager_upgrade_file, UpgradeLagerHandler}],
+ lists:foreach(Fun, Vars),
+
+ ok = rabbit_lager:start_logger(),
+
+ ok = rabbit_prelaunch_early_logging:setup_early_logging(Context, false).
diff --git a/deps/rabbit/src/rabbit_prequeue.erl b/deps/rabbit/src/rabbit_prequeue.erl
new file mode 100644
index 0000000000..b5af8927c7
--- /dev/null
+++ b/deps/rabbit/src/rabbit_prequeue.erl
@@ -0,0 +1,100 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_prequeue).
+
+%% This is the initial gen_server that all queue processes start off
+%% as. It handles the decision as to whether we need to start a new
+%% mirror, a new master/unmirrored, or whether we are restarting (and
+%% if so, as what). Thus a crashing queue process can restart from here
+%% and always do the right thing.
+
+-export([start_link/3]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+-behaviour(gen_server2).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+%%----------------------------------------------------------------------------
+
+-export_type([start_mode/0]).
+
+-type start_mode() :: 'declare' | 'recovery' | 'slave'.
+
+%%----------------------------------------------------------------------------
+
+-spec start_link(amqqueue:amqqueue(), start_mode(), pid())
+ -> rabbit_types:ok_pid_or_error().
+
+start_link(Q, StartMode, Marker) ->
+ gen_server2:start_link(?MODULE, {Q, StartMode, Marker}, []).
+
+%%----------------------------------------------------------------------------
+
+init({Q, StartMode, Marker}) ->
+ init(Q, case {is_process_alive(Marker), StartMode} of
+ {true, slave} -> slave;
+ {true, _} -> master;
+ {false, _} -> restart
+ end).
+
+init(Q, master) -> rabbit_amqqueue_process:init(Q);
+init(Q, slave) -> rabbit_mirror_queue_slave:init(Q);
+
+init(Q0, restart) when ?is_amqqueue(Q0) ->
+ QueueName = amqqueue:get_name(Q0),
+ {ok, Q1} = rabbit_amqqueue:lookup(QueueName),
+ QPid = amqqueue:get_pid(Q1),
+ SPids = amqqueue:get_slave_pids(Q1),
+ LocalOrMasterDown = node(QPid) =:= node()
+ orelse not rabbit_mnesia:on_running_node(QPid),
+ Slaves = [SPid || SPid <- SPids, rabbit_mnesia:is_process_alive(SPid)],
+ case rabbit_mnesia:is_process_alive(QPid) of
+ true -> false = LocalOrMasterDown, %% assertion
+ rabbit_mirror_queue_slave:go(self(), async),
+ rabbit_mirror_queue_slave:init(Q1); %% [1]
+ false -> case LocalOrMasterDown andalso Slaves =:= [] of
+ true -> crash_restart(Q1); %% [2]
+ false -> timer:sleep(25),
+ init(Q1, restart) %% [3]
+ end
+ end.
+%% [1] There is a master on another node. Regardless of whether we
+%% were originally a master or a mirror, we are now a new slave.
+%%
+%% [2] Nothing is alive. We are the last best hope. Try to restart as a master.
+%%
+%% [3] The current master is dead but either there are alive mirrors to
+%% take over or it's all happening on a different node anyway. This is
+%% not a stable situation. Sleep and wait for somebody else to make a
+%% move.
+
+crash_restart(Q0) when ?is_amqqueue(Q0) ->
+ QueueName = amqqueue:get_name(Q0),
+ rabbit_log:error("Restarting crashed ~s.~n", [rabbit_misc:rs(QueueName)]),
+ gen_server2:cast(self(), init),
+ Q1 = amqqueue:set_pid(Q0, self()),
+ rabbit_amqqueue_process:init(Q1).
+
+%%----------------------------------------------------------------------------
+
+%% This gen_server2 always hands over to some other module at the end
+%% of init/1.
+-spec handle_call(_, _, _) -> no_return().
+handle_call(_Msg, _From, _State) -> exit(unreachable).
+-spec handle_cast(_, _) -> no_return().
+handle_cast(_Msg, _State) -> exit(unreachable).
+-spec handle_info(_, _) -> no_return().
+handle_info(_Msg, _State) -> exit(unreachable).
+-spec terminate(_, _) -> no_return().
+terminate(_Reason, _State) -> exit(unreachable).
+-spec code_change(_, _, _) -> no_return().
+code_change(_OldVsn, _State, _Extra) -> exit(unreachable).
diff --git a/deps/rabbit/src/rabbit_priority_queue.erl b/deps/rabbit/src/rabbit_priority_queue.erl
new file mode 100644
index 0000000000..4b41b8dfbd
--- /dev/null
+++ b/deps/rabbit/src/rabbit_priority_queue.erl
@@ -0,0 +1,688 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2015-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_priority_queue).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include_lib("rabbit_common/include/rabbit_framing.hrl").
+-include("amqqueue.hrl").
+
+-behaviour(rabbit_backing_queue).
+
+%% enabled unconditionally. Disabling priority queuing after
+%% it has been enabled is dangerous.
+-rabbit_boot_step({?MODULE,
+ [{description, "enable priority queue"},
+ {mfa, {?MODULE, enable, []}},
+ {requires, pre_boot},
+ {enables, kernel_ready}]}).
+
+-export([enable/0]).
+
+-export([start/2, stop/1]).
+
+-export([init/3, terminate/2, delete_and_terminate/2, delete_crashed/1,
+ purge/1, purge_acks/1,
+ publish/6, publish_delivered/5, discard/4, drain_confirmed/1,
+ batch_publish/4, batch_publish_delivered/4,
+ dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2,
+ ackfold/4, fold/3, len/1, is_empty/1, depth/1,
+ set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1,
+ handle_pre_hibernate/1, resume/1, msg_rates/1,
+ info/2, invoke/3, is_duplicate/2, set_queue_mode/2,
+ zip_msgs_and_acks/4, handle_info/2]).
+
+-record(state, {bq, bqss, max_priority}).
+-record(passthrough, {bq, bqs}).
+
+%% See 'note on suffixes' below
+-define(passthrough1(F), State#passthrough{bqs = BQ:F}).
+-define(passthrough2(F),
+ {Res, BQS1} = BQ:F, {Res, State#passthrough{bqs = BQS1}}).
+-define(passthrough3(F),
+ {Res1, Res2, BQS1} = BQ:F, {Res1, Res2, State#passthrough{bqs = BQS1}}).
+
+%% This module adds support for priority queues.
+%%
+%% Priority queues have one backing queue per priority. Backing queue functions
+%% then produce a list of results for each BQ and fold over them, sorting
+%% by priority.
+%%
+%%For queues that do not
+%% have priorities enabled, the functions in this module delegate to
+%% their "regular" backing queue module counterparts. See the `passthrough`
+%% record and passthrough{1,2,3} macros.
+%%
+%% Delivery to consumers happens by first "running" the queue with
+%% the highest priority until there are no more messages to deliver,
+%% then the next one, and so on. This offers good prioritisation
+%% but may result in lower priority messages not being delivered
+%% when there's a high ingress rate of messages with higher priority.
+
+enable() ->
+ {ok, RealBQ} = application:get_env(rabbit, backing_queue_module),
+ case RealBQ of
+ ?MODULE -> ok;
+ _ -> rabbit_log:info("Priority queues enabled, real BQ is ~s~n",
+ [RealBQ]),
+ application:set_env(
+ rabbitmq_priority_queue, backing_queue_module, RealBQ),
+ application:set_env(rabbit, backing_queue_module, ?MODULE)
+ end.
+
+%%----------------------------------------------------------------------------
+
+start(VHost, QNames) ->
+ BQ = bq(),
+ %% TODO this expand-collapse dance is a bit ridiculous but it's what
+ %% rabbit_amqqueue:recover/0 expects. We could probably simplify
+ %% this if we rejigged recovery a bit.
+ {DupNames, ExpNames} = expand_queues(QNames),
+ case BQ:start(VHost, ExpNames) of
+ {ok, ExpRecovery} ->
+ {ok, collapse_recovery(QNames, DupNames, ExpRecovery)};
+ Else ->
+ Else
+ end.
+
+stop(VHost) ->
+ BQ = bq(),
+ BQ:stop(VHost).
+
+%%----------------------------------------------------------------------------
+
+mutate_name(P, Q) when ?is_amqqueue(Q) ->
+ Res0 = #resource{name = QNameBin0} = amqqueue:get_name(Q),
+ QNameBin1 = mutate_name_bin(P, QNameBin0),
+ Res1 = Res0#resource{name = QNameBin1},
+ amqqueue:set_name(Q, Res1).
+
+mutate_name_bin(P, NameBin) ->
+ <<NameBin/binary, 0, P:8>>.
+
+expand_queues(QNames) ->
+ lists:unzip(
+ lists:append([expand_queue(QName) || QName <- QNames])).
+
+expand_queue(QName = #resource{name = QNameBin}) ->
+ {ok, Q} = rabbit_misc:dirty_read({rabbit_durable_queue, QName}),
+ case priorities(Q) of
+ none -> [{QName, QName}];
+ Ps -> [{QName, QName#resource{name = mutate_name_bin(P, QNameBin)}}
+ || P <- Ps]
+ end.
+
+collapse_recovery(QNames, DupNames, Recovery) ->
+ NameToTerms = lists:foldl(fun({Name, RecTerm}, Dict) ->
+ dict:append(Name, RecTerm, Dict)
+ end, dict:new(), lists:zip(DupNames, Recovery)),
+ [dict:fetch(Name, NameToTerms) || Name <- QNames].
+
+priorities(Q) when ?is_amqqueue(Q) ->
+ Args = amqqueue:get_arguments(Q),
+ Ints = [long, short, signedint, byte, unsignedbyte, unsignedshort, unsignedint],
+ case rabbit_misc:table_lookup(Args, <<"x-max-priority">>) of
+ {Type, RequestedMax} ->
+ case lists:member(Type, Ints) of
+ false -> none;
+ true ->
+ Max = min(RequestedMax, ?MAX_SUPPORTED_PRIORITY),
+ lists:reverse(lists:seq(0, Max))
+ end;
+ _ -> none
+ end.
+
+%%----------------------------------------------------------------------------
+
+init(Q, Recover, AsyncCallback) ->
+ BQ = bq(),
+ case priorities(Q) of
+ none -> RealRecover = case Recover of
+ [R] -> R; %% [0]
+ R -> R
+ end,
+ #passthrough{bq = BQ,
+ bqs = BQ:init(Q, RealRecover, AsyncCallback)};
+ Ps -> Init = fun (P, Term) ->
+ BQ:init(
+ mutate_name(P, Q), Term,
+ fun (M, F) -> AsyncCallback(M, {P, F}) end)
+ end,
+ BQSs = case have_recovery_terms(Recover) of
+ false -> [{P, Init(P, Recover)} || P <- Ps];
+ _ -> PsTerms = lists:zip(Ps, Recover),
+ [{P, Init(P, Term)} || {P, Term} <- PsTerms]
+ end,
+ #state{bq = BQ,
+ bqss = BQSs,
+ max_priority = hd(Ps)}
+ end.
+%% [0] collapse_recovery has the effect of making a list of recovery
+%% terms in priority order, even for non priority queues. It's easier
+%% to do that and "unwrap" in init/3 than to have collapse_recovery be
+%% aware of non-priority queues.
+
+have_recovery_terms(new) -> false;
+have_recovery_terms(non_clean_shutdown) -> false;
+have_recovery_terms(_) -> true.
+
+terminate(Reason, State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) -> BQ:terminate(Reason, BQSN) end, State);
+terminate(Reason, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(terminate(Reason, BQS)).
+
+delete_and_terminate(Reason, State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) ->
+ BQ:delete_and_terminate(Reason, BQSN)
+ end, State);
+delete_and_terminate(Reason, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(delete_and_terminate(Reason, BQS)).
+
+delete_crashed(Q) ->
+ BQ = bq(),
+ case priorities(Q) of
+ none -> BQ:delete_crashed(Q);
+ Ps -> [BQ:delete_crashed(mutate_name(P, Q)) || P <- Ps]
+ end.
+
+purge(State = #state{bq = BQ}) ->
+ fold_add2(fun (_P, BQSN) -> BQ:purge(BQSN) end, State);
+purge(State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(purge(BQS)).
+
+purge_acks(State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) -> BQ:purge_acks(BQSN) end, State);
+purge_acks(State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(purge_acks(BQS)).
+
+publish(Msg, MsgProps, IsDelivered, ChPid, Flow, State = #state{bq = BQ}) ->
+ pick1(fun (_P, BQSN) ->
+ BQ:publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQSN)
+ end, Msg, State);
+publish(Msg, MsgProps, IsDelivered, ChPid, Flow,
+ State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQS)).
+
+batch_publish(Publishes, ChPid, Flow, State = #state{bq = BQ, bqss = [{MaxP, _} |_]}) ->
+ PubMap = partition_publish_batch(Publishes, MaxP),
+ lists:foldl(
+ fun ({Priority, Pubs}, St) ->
+ pick1(fun (_P, BQSN) ->
+ BQ:batch_publish(Pubs, ChPid, Flow, BQSN)
+ end, Priority, St)
+ end, State, maps:to_list(PubMap));
+batch_publish(Publishes, ChPid, Flow,
+ State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(batch_publish(Publishes, ChPid, Flow, BQS)).
+
+publish_delivered(Msg, MsgProps, ChPid, Flow, State = #state{bq = BQ}) ->
+ pick2(fun (P, BQSN) ->
+ {AckTag, BQSN1} = BQ:publish_delivered(
+ Msg, MsgProps, ChPid, Flow, BQSN),
+ {{P, AckTag}, BQSN1}
+ end, Msg, State);
+publish_delivered(Msg, MsgProps, ChPid, Flow,
+ State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(publish_delivered(Msg, MsgProps, ChPid, Flow, BQS)).
+
+batch_publish_delivered(Publishes, ChPid, Flow, State = #state{bq = BQ, bqss = [{MaxP, _} |_]}) ->
+ PubMap = partition_publish_delivered_batch(Publishes, MaxP),
+ {PrioritiesAndAcks, State1} =
+ lists:foldl(
+ fun ({Priority, Pubs}, {PriosAndAcks, St}) ->
+ {PriosAndAcks1, St1} =
+ pick2(fun (P, BQSN) ->
+ {AckTags, BQSN1} =
+ BQ:batch_publish_delivered(
+ Pubs, ChPid, Flow, BQSN),
+ {priority_on_acktags(P, AckTags), BQSN1}
+ end, Priority, St),
+ {[PriosAndAcks1 | PriosAndAcks], St1}
+ end, {[], State}, maps:to_list(PubMap)),
+ {lists:reverse(PrioritiesAndAcks), State1};
+batch_publish_delivered(Publishes, ChPid, Flow,
+ State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(batch_publish_delivered(Publishes, ChPid, Flow, BQS)).
+
+%% TODO this is a hack. The BQ api does not give us enough information
+%% here - if we had the Msg we could look at its priority and forward
+%% to the appropriate sub-BQ. But we don't so we are stuck.
+%%
+%% But fortunately VQ ignores discard/4, so we can too, *assuming we
+%% are talking to VQ*. discard/4 is used by HA, but that's "above" us
+%% (if in use) so we don't break that either, just some hypothetical
+%% alternate BQ implementation.
+discard(_MsgId, _ChPid, _Flow, State = #state{}) ->
+ State;
+ %% We should have something a bit like this here:
+ %% pick1(fun (_P, BQSN) ->
+ %% BQ:discard(MsgId, ChPid, Flow, BQSN)
+ %% end, Msg, State);
+discard(MsgId, ChPid, Flow, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(discard(MsgId, ChPid, Flow, BQS)).
+
+drain_confirmed(State = #state{bq = BQ}) ->
+ fold_append2(fun (_P, BQSN) -> BQ:drain_confirmed(BQSN) end, State);
+drain_confirmed(State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(drain_confirmed(BQS)).
+
+dropwhile(Pred, State = #state{bq = BQ}) ->
+ find2(fun (_P, BQSN) -> BQ:dropwhile(Pred, BQSN) end, undefined, State);
+dropwhile(Pred, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(dropwhile(Pred, BQS)).
+
+%% TODO this is a bit nasty. In the one place where fetchwhile/4 is
+%% actually used the accumulator is a list of acktags, which of course
+%% we need to mutate - so we do that although we are encoding an
+%% assumption here.
+fetchwhile(Pred, Fun, Acc, State = #state{bq = BQ}) ->
+ findfold3(
+ fun (P, BQSN, AccN) ->
+ {Res, AccN1, BQSN1} = BQ:fetchwhile(Pred, Fun, AccN, BQSN),
+ {Res, priority_on_acktags(P, AccN1), BQSN1}
+ end, Acc, undefined, State);
+fetchwhile(Pred, Fun, Acc, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough3(fetchwhile(Pred, Fun, Acc, BQS)).
+
+fetch(AckRequired, State = #state{bq = BQ}) ->
+ find2(
+ fun (P, BQSN) ->
+ case BQ:fetch(AckRequired, BQSN) of
+ {empty, BQSN1} -> {empty, BQSN1};
+ {{Msg, Del, ATag}, BQSN1} -> {{Msg, Del, {P, ATag}}, BQSN1}
+ end
+ end, empty, State);
+fetch(AckRequired, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(fetch(AckRequired, BQS)).
+
+drop(AckRequired, State = #state{bq = BQ}) ->
+ find2(fun (P, BQSN) ->
+ case BQ:drop(AckRequired, BQSN) of
+ {empty, BQSN1} -> {empty, BQSN1};
+ {{MsgId, AckTag}, BQSN1} -> {{MsgId, {P, AckTag}}, BQSN1}
+ end
+ end, empty, State);
+drop(AckRequired, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(drop(AckRequired, BQS)).
+
+ack(AckTags, State = #state{bq = BQ}) ->
+ fold_by_acktags2(fun (AckTagsN, BQSN) ->
+ BQ:ack(AckTagsN, BQSN)
+ end, AckTags, State);
+ack(AckTags, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(ack(AckTags, BQS)).
+
+requeue(AckTags, State = #state{bq = BQ}) ->
+ fold_by_acktags2(fun (AckTagsN, BQSN) ->
+ BQ:requeue(AckTagsN, BQSN)
+ end, AckTags, State);
+requeue(AckTags, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(requeue(AckTags, BQS)).
+
+%% Similar problem to fetchwhile/4
+ackfold(MsgFun, Acc, State = #state{bq = BQ}, AckTags) ->
+ AckTagsByPriority = partition_acktags(AckTags),
+ fold2(
+ fun (P, BQSN, AccN) ->
+ case maps:find(P, AckTagsByPriority) of
+ {ok, ATagsN} -> {AccN1, BQSN1} =
+ BQ:ackfold(MsgFun, AccN, BQSN, ATagsN),
+ {priority_on_acktags(P, AccN1), BQSN1};
+ error -> {AccN, BQSN}
+ end
+ end, Acc, State);
+ackfold(MsgFun, Acc, State = #passthrough{bq = BQ, bqs = BQS}, AckTags) ->
+ ?passthrough2(ackfold(MsgFun, Acc, BQS, AckTags)).
+
+fold(Fun, Acc, State = #state{bq = BQ}) ->
+ fold2(fun (_P, BQSN, AccN) -> BQ:fold(Fun, AccN, BQSN) end, Acc, State);
+fold(Fun, Acc, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(fold(Fun, Acc, BQS)).
+
+len(#state{bq = BQ, bqss = BQSs}) ->
+ add0(fun (_P, BQSN) -> BQ:len(BQSN) end, BQSs);
+len(#passthrough{bq = BQ, bqs = BQS}) ->
+ BQ:len(BQS).
+
+is_empty(#state{bq = BQ, bqss = BQSs}) ->
+ all0(fun (_P, BQSN) -> BQ:is_empty(BQSN) end, BQSs);
+is_empty(#passthrough{bq = BQ, bqs = BQS}) ->
+ BQ:is_empty(BQS).
+
+depth(#state{bq = BQ, bqss = BQSs}) ->
+ add0(fun (_P, BQSN) -> BQ:depth(BQSN) end, BQSs);
+depth(#passthrough{bq = BQ, bqs = BQS}) ->
+ BQ:depth(BQS).
+
+set_ram_duration_target(DurationTarget, State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) ->
+ BQ:set_ram_duration_target(DurationTarget, BQSN)
+ end, State);
+set_ram_duration_target(DurationTarget,
+ State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(set_ram_duration_target(DurationTarget, BQS)).
+
+ram_duration(State = #state{bq = BQ}) ->
+ fold_min2(fun (_P, BQSN) -> BQ:ram_duration(BQSN) end, State);
+ram_duration(State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(ram_duration(BQS)).
+
+needs_timeout(#state{bq = BQ, bqss = BQSs}) ->
+ fold0(fun (_P, _BQSN, timed) -> timed;
+ (_P, BQSN, idle) -> case BQ:needs_timeout(BQSN) of
+ timed -> timed;
+ _ -> idle
+ end;
+ (_P, BQSN, false) -> BQ:needs_timeout(BQSN)
+ end, false, BQSs);
+needs_timeout(#passthrough{bq = BQ, bqs = BQS}) ->
+ BQ:needs_timeout(BQS).
+
+timeout(State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) -> BQ:timeout(BQSN) end, State);
+timeout(State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(timeout(BQS)).
+
+handle_pre_hibernate(State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) ->
+ BQ:handle_pre_hibernate(BQSN)
+ end, State);
+handle_pre_hibernate(State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(handle_pre_hibernate(BQS)).
+
+handle_info(Msg, State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) -> BQ:handle_info(Msg, BQSN) end, State);
+handle_info(Msg, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(handle_info(Msg, BQS)).
+
+resume(State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) -> BQ:resume(BQSN) end, State);
+resume(State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(resume(BQS)).
+
+msg_rates(#state{bq = BQ, bqss = BQSs}) ->
+ fold0(fun(_P, BQSN, {InN, OutN}) ->
+ {In, Out} = BQ:msg_rates(BQSN),
+ {InN + In, OutN + Out}
+ end, {0.0, 0.0}, BQSs);
+msg_rates(#passthrough{bq = BQ, bqs = BQS}) ->
+ BQ:msg_rates(BQS).
+
+info(backing_queue_status, #state{bq = BQ, bqss = BQSs}) ->
+ fold0(fun (P, BQSN, Acc) ->
+ combine_status(P, BQ:info(backing_queue_status, BQSN), Acc)
+ end, nothing, BQSs);
+info(head_message_timestamp, #state{bq = BQ, bqss = BQSs}) ->
+ find_head_message_timestamp(BQ, BQSs, '');
+info(Item, #state{bq = BQ, bqss = BQSs}) ->
+ fold0(fun (_P, BQSN, Acc) ->
+ Acc + BQ:info(Item, BQSN)
+ end, 0, BQSs);
+info(Item, #passthrough{bq = BQ, bqs = BQS}) ->
+ BQ:info(Item, BQS).
+
+invoke(Mod, {P, Fun}, State = #state{bq = BQ}) ->
+ pick1(fun (_P, BQSN) -> BQ:invoke(Mod, Fun, BQSN) end, P, State);
+invoke(Mod, Fun, State = #state{bq = BQ, max_priority = P}) ->
+ pick1(fun (_P, BQSN) -> BQ:invoke(Mod, Fun, BQSN) end, P, State);
+invoke(Mod, Fun, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(invoke(Mod, Fun, BQS)).
+
+is_duplicate(Msg, State = #state{bq = BQ}) ->
+ pick2(fun (_P, BQSN) -> BQ:is_duplicate(Msg, BQSN) end, Msg, State);
+is_duplicate(Msg, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough2(is_duplicate(Msg, BQS)).
+
+set_queue_mode(Mode, State = #state{bq = BQ}) ->
+ foreach1(fun (_P, BQSN) -> BQ:set_queue_mode(Mode, BQSN) end, State);
+set_queue_mode(Mode, State = #passthrough{bq = BQ, bqs = BQS}) ->
+ ?passthrough1(set_queue_mode(Mode, BQS)).
+
+zip_msgs_and_acks(Msgs, AckTags, Accumulator, #state{bqss = [{MaxP, _} |_]}) ->
+ MsgsByPriority = partition_publish_delivered_batch(Msgs, MaxP),
+ lists:foldl(fun (Acks, MAs) ->
+ {P, _AckTag} = hd(Acks),
+ Pubs = maps:get(P, MsgsByPriority),
+ MAs0 = zip_msgs_and_acks(Pubs, Acks),
+ MAs ++ MAs0
+ end, Accumulator, AckTags);
+zip_msgs_and_acks(Msgs, AckTags, Accumulator,
+ #passthrough{bq = BQ, bqs = BQS}) ->
+ BQ:zip_msgs_and_acks(Msgs, AckTags, Accumulator, BQS).
+
+%%----------------------------------------------------------------------------
+
+bq() ->
+ {ok, RealBQ} = application:get_env(
+ rabbitmq_priority_queue, backing_queue_module),
+ RealBQ.
+
+%% Note on suffixes: Many utility functions here have suffixes telling
+%% you the arity of the return type of the BQ function they are
+%% designed to work with.
+%%
+%% 0 - BQ function returns a value and does not modify state
+%% 1 - BQ function just returns a new state
+%% 2 - BQ function returns a 2-tuple of {Result, NewState}
+%% 3 - BQ function returns a 3-tuple of {Result1, Result2, NewState}
+
+%% Fold over results
+fold0(Fun, Acc, [{P, BQSN} | Rest]) -> fold0(Fun, Fun(P, BQSN, Acc), Rest);
+fold0(_Fun, Acc, []) -> Acc.
+
+%% Do all BQs match?
+all0(Pred, BQSs) -> fold0(fun (_P, _BQSN, false) -> false;
+ (P, BQSN, true) -> Pred(P, BQSN)
+ end, true, BQSs).
+
+%% Sum results
+add0(Fun, BQSs) -> fold0(fun (P, BQSN, Acc) -> Acc + Fun(P, BQSN) end, 0, BQSs).
+
+%% Apply for all states
+foreach1(Fun, State = #state{bqss = BQSs}) ->
+ a(State#state{bqss = foreach1(Fun, BQSs, [])}).
+foreach1(Fun, [{Priority, BQSN} | Rest], BQSAcc) ->
+ BQSN1 = Fun(Priority, BQSN),
+ foreach1(Fun, Rest, [{Priority, BQSN1} | BQSAcc]);
+foreach1(_Fun, [], BQSAcc) ->
+ lists:reverse(BQSAcc).
+
+%% For a given thing, just go to its BQ
+pick1(Fun, Prioritisable, #state{bqss = BQSs} = State) ->
+ {P, BQSN} = priority_bq(Prioritisable, BQSs),
+ a(State#state{bqss = bq_store(P, Fun(P, BQSN), BQSs)}).
+
+%% Fold over results
+fold2(Fun, Acc, State = #state{bqss = BQSs}) ->
+ {Res, BQSs1} = fold2(Fun, Acc, BQSs, []),
+ {Res, a(State#state{bqss = BQSs1})}.
+
+fold2(Fun, Acc, [{P, BQSN} | Rest], BQSAcc) ->
+ {Acc1, BQSN1} = Fun(P, BQSN, Acc),
+ fold2(Fun, Acc1, Rest, [{P, BQSN1} | BQSAcc]);
+fold2(_Fun, Acc, [], BQSAcc) ->
+ {Acc, lists:reverse(BQSAcc)}.
+
+%% Fold over results assuming results are lists and we want to append them
+fold_append2(Fun, State) ->
+ fold2(fun (P, BQSN, Acc) ->
+ {Res, BQSN1} = Fun(P, BQSN),
+ {Res ++ Acc, BQSN1}
+ end, [], State).
+
+%% Fold over results assuming results are numbers and we want to sum them
+fold_add2(Fun, State) ->
+ fold2(fun (P, BQSN, Acc) ->
+ {Res, BQSN1} = Fun(P, BQSN),
+ {add_maybe_infinity(Res, Acc), BQSN1}
+ end, 0, State).
+
+%% Fold over results assuming results are numbers and we want the minimum
+fold_min2(Fun, State) ->
+ fold2(fun (P, BQSN, Acc) ->
+ {Res, BQSN1} = Fun(P, BQSN),
+ {erlang:min(Res, Acc), BQSN1}
+ end, infinity, State).
+
+%% Fold over results assuming results are lists and we want to append
+%% them, and also that we have some AckTags we want to pass in to each
+%% invocation.
+fold_by_acktags2(Fun, AckTags, State) ->
+ AckTagsByPriority = partition_acktags(AckTags),
+ fold_append2(fun (P, BQSN) ->
+ case maps:find(P, AckTagsByPriority) of
+ {ok, AckTagsN} -> Fun(AckTagsN, BQSN);
+ error -> {[], BQSN}
+ end
+ end, State).
+
+%% For a given thing, just go to its BQ
+pick2(Fun, Prioritisable, #state{bqss = BQSs} = State) ->
+ {P, BQSN} = priority_bq(Prioritisable, BQSs),
+ {Res, BQSN1} = Fun(P, BQSN),
+ {Res, a(State#state{bqss = bq_store(P, BQSN1, BQSs)})}.
+
+%% Run through BQs in priority order until one does not return
+%% {NotFound, NewState} or we have gone through them all.
+find2(Fun, NotFound, State = #state{bqss = BQSs}) ->
+ {Res, BQSs1} = find2(Fun, NotFound, BQSs, []),
+ {Res, a(State#state{bqss = BQSs1})}.
+find2(Fun, NotFound, [{P, BQSN} | Rest], BQSAcc) ->
+ case Fun(P, BQSN) of
+ {NotFound, BQSN1} -> find2(Fun, NotFound, Rest, [{P, BQSN1} | BQSAcc]);
+ {Res, BQSN1} -> {Res, lists:reverse([{P, BQSN1} | BQSAcc]) ++ Rest}
+ end;
+find2(_Fun, NotFound, [], BQSAcc) ->
+ {NotFound, lists:reverse(BQSAcc)}.
+
+%% Run through BQs in priority order like find2 but also folding as we go.
+findfold3(Fun, Acc, NotFound, State = #state{bqss = BQSs}) ->
+ {Res, Acc1, BQSs1} = findfold3(Fun, Acc, NotFound, BQSs, []),
+ {Res, Acc1, a(State#state{bqss = BQSs1})}.
+findfold3(Fun, Acc, NotFound, [{P, BQSN} | Rest], BQSAcc) ->
+ case Fun(P, BQSN, Acc) of
+ {NotFound, Acc1, BQSN1} ->
+ findfold3(Fun, Acc1, NotFound, Rest, [{P, BQSN1} | BQSAcc]);
+ {Res, Acc1, BQSN1} ->
+ {Res, Acc1, lists:reverse([{P, BQSN1} | BQSAcc]) ++ Rest}
+ end;
+findfold3(_Fun, Acc, NotFound, [], BQSAcc) ->
+ {NotFound, Acc, lists:reverse(BQSAcc)}.
+
+bq_fetch(P, []) -> exit({not_found, P});
+bq_fetch(P, [{P, BQSN} | _]) -> {P, BQSN};
+bq_fetch(P, [{_, _BQSN} | T]) -> bq_fetch(P, T).
+
+bq_store(P, BQS, BQSs) ->
+ [{PN, case PN of
+ P -> BQS;
+ _ -> BQSN
+ end} || {PN, BQSN} <- BQSs].
+
+%%
+a(State = #state{bqss = BQSs}) ->
+ Ps = [P || {P, _} <- BQSs],
+ case lists:reverse(lists:usort(Ps)) of
+ Ps -> State;
+ _ -> exit({bad_order, Ps})
+ end.
+
+%%----------------------------------------------------------------------------
+partition_publish_batch(Publishes, MaxP) ->
+ partition_publishes(
+ Publishes, fun ({Msg, _, _}) -> Msg end, MaxP).
+
+partition_publish_delivered_batch(Publishes, MaxP) ->
+ partition_publishes(
+ Publishes, fun ({Msg, _}) -> Msg end, MaxP).
+
+partition_publishes(Publishes, ExtractMsg, MaxP) ->
+ Partitioned =
+ lists:foldl(fun (Pub, Dict) ->
+ Msg = ExtractMsg(Pub),
+ rabbit_misc:maps_cons(priority(Msg, MaxP), Pub, Dict)
+ end, maps:new(), Publishes),
+ maps:map(fun (_P, RevPubs) ->
+ lists:reverse(RevPubs)
+ end, Partitioned).
+
+
+priority_bq(Priority, [{MaxP, _} | _] = BQSs) ->
+ bq_fetch(priority(Priority, MaxP), BQSs).
+
+%% Messages with a priority which is higher than the queue's maximum are treated
+%% as if they were published with the maximum priority.
+priority(undefined, _MaxP) ->
+ 0;
+priority(Priority, MaxP) when is_integer(Priority), Priority =< MaxP ->
+ Priority;
+priority(Priority, MaxP) when is_integer(Priority), Priority > MaxP ->
+ MaxP;
+priority(#basic_message{content = Content}, MaxP) ->
+ priority(rabbit_binary_parser:ensure_content_decoded(Content), MaxP);
+priority(#content{properties = Props}, MaxP) ->
+ #'P_basic'{priority = Priority0} = Props,
+ priority(Priority0, MaxP).
+
+add_maybe_infinity(infinity, _) -> infinity;
+add_maybe_infinity(_, infinity) -> infinity;
+add_maybe_infinity(A, B) -> A + B.
+
+partition_acktags(AckTags) -> partition_acktags(AckTags, maps:new()).
+
+partition_acktags([], Partitioned) ->
+ maps:map(fun (_P, RevAckTags) ->
+ lists:reverse(RevAckTags)
+ end, Partitioned);
+partition_acktags([{P, AckTag} | Rest], Partitioned) ->
+ partition_acktags(Rest, rabbit_misc:maps_cons(P, AckTag, Partitioned)).
+
+priority_on_acktags(P, AckTags) ->
+ [case Tag of
+ _ when is_integer(Tag) -> {P, Tag};
+ _ -> Tag
+ end || Tag <- AckTags].
+
+combine_status(P, New, nothing) ->
+ [{priority_lengths, [{P, proplists:get_value(len, New)}]} | New];
+combine_status(P, New, Old) ->
+ Combined = [{K, cse(V, proplists:get_value(K, Old))} || {K, V} <- New],
+ Lens = [{P, proplists:get_value(len, New)} |
+ proplists:get_value(priority_lengths, Old)],
+ [{priority_lengths, Lens} | Combined].
+
+cse(infinity, _) -> infinity;
+cse(_, infinity) -> infinity;
+%% queue modes
+cse(_, default) -> default;
+cse(default, _) -> default;
+cse(_, lazy) -> lazy;
+cse(lazy, _) -> lazy;
+%% numerical stats
+cse(A, B) when is_number(A) -> A + B;
+cse({delta, _, _, _, _}, _) -> {delta, todo, todo, todo, todo};
+cse(_, _) -> undefined.
+
+%% When asked about 'head_message_timestamp' fro this priority queue, we
+%% walk all the backing queues, starting by the highest priority. Once a
+%% backing queue having messages (ready or unacknowledged) is found, its
+%% 'head_message_timestamp' is returned even if it is null.
+
+find_head_message_timestamp(BQ, [{_, BQSN} | Rest], Timestamp) ->
+ MsgCount = BQ:len(BQSN) + BQ:info(messages_unacknowledged_ram, BQSN),
+ if
+ MsgCount =/= 0 -> BQ:info(head_message_timestamp, BQSN);
+ true -> find_head_message_timestamp(BQ, Rest, Timestamp)
+ end;
+find_head_message_timestamp(_, [], Timestamp) ->
+ Timestamp.
+
+zip_msgs_and_acks(Pubs, AckTags) ->
+ lists:zipwith(
+ fun ({#basic_message{ id = Id }, _Props}, AckTag) ->
+ {Id, AckTag}
+ end, Pubs, AckTags).
diff --git a/deps/rabbit/src/rabbit_queue_consumers.erl b/deps/rabbit/src/rabbit_queue_consumers.erl
new file mode 100644
index 0000000000..4f826f72e8
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_consumers.erl
@@ -0,0 +1,568 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_consumers).
+
+-export([new/0, max_active_priority/1, inactive/1, all/1, all/3, count/0,
+ unacknowledged_message_count/0, add/10, remove/3, erase_ch/2,
+ send_drained/0, deliver/5, record_ack/3, subtract_acks/3,
+ possibly_unblock/3,
+ resume_fun/0, notify_sent_fun/1, activate_limit_fun/0,
+ credit/6, utilisation/1, is_same/3, get_consumer/1, get/3,
+ consumer_tag/1, get_infos/1]).
+
+%%----------------------------------------------------------------------------
+
+-define(QUEUE, lqueue).
+
+-define(UNSENT_MESSAGE_LIMIT, 200).
+
+%% Utilisation average calculations are all in μs.
+-define(USE_AVG_HALF_LIFE, 1000000.0).
+
+-record(state, {consumers, use}).
+
+-record(consumer, {tag, ack_required, prefetch, args, user}).
+
+%% These are held in our process dictionary
+-record(cr, {ch_pid,
+ monitor_ref,
+ acktags,
+ consumer_count,
+ %% Queue of {ChPid, #consumer{}} for consumers which have
+ %% been blocked (rate/prefetch limited) for any reason
+ blocked_consumers,
+ %% The limiter itself
+ limiter,
+ %% Internal flow control for queue -> writer
+ unsent_message_count}).
+
+%%----------------------------------------------------------------------------
+
+-type time_micros() :: non_neg_integer().
+-type ratio() :: float().
+-type state() :: #state{consumers ::priority_queue:q(),
+ use :: {'inactive',
+ time_micros(), time_micros(), ratio()} |
+ {'active', time_micros(), ratio()}}.
+-type consumer() :: #consumer{tag::rabbit_types:ctag(), ack_required::boolean(),
+ prefetch::non_neg_integer(), args::rabbit_framing:amqp_table(),
+ user::rabbit_types:username()}.
+-type ch() :: pid().
+-type ack() :: non_neg_integer().
+-type cr_fun() :: fun ((#cr{}) -> #cr{}).
+-type fetch_result() :: {rabbit_types:basic_message(), boolean(), ack()}.
+
+%%----------------------------------------------------------------------------
+
+-spec new() -> state().
+
+new() -> #state{consumers = priority_queue:new(),
+ use = {active,
+ erlang:monotonic_time(micro_seconds),
+ 1.0}}.
+
+-spec max_active_priority(state()) -> integer() | 'infinity' | 'empty'.
+
+max_active_priority(#state{consumers = Consumers}) ->
+ priority_queue:highest(Consumers).
+
+-spec inactive(state()) -> boolean().
+
+inactive(#state{consumers = Consumers}) ->
+ priority_queue:is_empty(Consumers).
+
+-spec all(state()) -> [{ch(), rabbit_types:ctag(), boolean(),
+ non_neg_integer(), boolean(), atom(),
+ rabbit_framing:amqp_table(), rabbit_types:username()}].
+
+all(State) ->
+ all(State, none, false).
+
+all(#state{consumers = Consumers}, SingleActiveConsumer, SingleActiveConsumerOn) ->
+ lists:foldl(fun (C, Acc) -> consumers(C#cr.blocked_consumers, SingleActiveConsumer, SingleActiveConsumerOn, Acc) end,
+ consumers(Consumers, SingleActiveConsumer, SingleActiveConsumerOn, []), all_ch_record()).
+
+consumers(Consumers, SingleActiveConsumer, SingleActiveConsumerOn, Acc) ->
+ ActiveActivityStatusFun = case SingleActiveConsumerOn of
+ true ->
+ fun({ChPid, Consumer}) ->
+ case SingleActiveConsumer of
+ {ChPid, Consumer} ->
+ {true, single_active};
+ _ ->
+ {false, waiting}
+ end
+ end;
+ false ->
+ fun(_) -> {true, up} end
+ end,
+ priority_queue:fold(
+ fun ({ChPid, Consumer}, _P, Acc1) ->
+ #consumer{tag = CTag, ack_required = Ack, prefetch = Prefetch,
+ args = Args, user = Username} = Consumer,
+ {Active, ActivityStatus} = ActiveActivityStatusFun({ChPid, Consumer}),
+ [{ChPid, CTag, Ack, Prefetch, Active, ActivityStatus, Args, Username} | Acc1]
+ end, Acc, Consumers).
+
+-spec count() -> non_neg_integer().
+
+count() -> lists:sum([Count || #cr{consumer_count = Count} <- all_ch_record()]).
+
+-spec unacknowledged_message_count() -> non_neg_integer().
+
+unacknowledged_message_count() ->
+ lists:sum([?QUEUE:len(C#cr.acktags) || C <- all_ch_record()]).
+
+-spec add(ch(), rabbit_types:ctag(), boolean(), pid(), boolean(),
+ non_neg_integer(), rabbit_framing:amqp_table(), boolean(),
+ rabbit_types:username(), state())
+ -> state().
+
+add(ChPid, CTag, NoAck, LimiterPid, LimiterActive, Prefetch, Args, IsEmpty,
+ Username, State = #state{consumers = Consumers,
+ use = CUInfo}) ->
+ C = #cr{consumer_count = Count,
+ limiter = Limiter} = ch_record(ChPid, LimiterPid),
+ Limiter1 = case LimiterActive of
+ true -> rabbit_limiter:activate(Limiter);
+ false -> Limiter
+ end,
+ C1 = C#cr{consumer_count = Count + 1, limiter = Limiter1},
+ update_ch_record(
+ case parse_credit_args(Prefetch, Args) of
+ {0, auto} -> C1;
+ {_Credit, auto} when NoAck -> C1;
+ {Credit, Mode} -> credit_and_drain(
+ C1, CTag, Credit, Mode, IsEmpty)
+ end),
+ Consumer = #consumer{tag = CTag,
+ ack_required = not NoAck,
+ prefetch = Prefetch,
+ args = Args,
+ user = Username},
+ State#state{consumers = add_consumer({ChPid, Consumer}, Consumers),
+ use = update_use(CUInfo, active)}.
+
+-spec remove(ch(), rabbit_types:ctag(), state()) ->
+ 'not_found' | state().
+
+remove(ChPid, CTag, State = #state{consumers = Consumers}) ->
+ case lookup_ch(ChPid) of
+ not_found ->
+ not_found;
+ C = #cr{consumer_count = Count,
+ limiter = Limiter,
+ blocked_consumers = Blocked} ->
+ Blocked1 = remove_consumer(ChPid, CTag, Blocked),
+ Limiter1 = case Count of
+ 1 -> rabbit_limiter:deactivate(Limiter);
+ _ -> Limiter
+ end,
+ Limiter2 = rabbit_limiter:forget_consumer(Limiter1, CTag),
+ update_ch_record(C#cr{consumer_count = Count - 1,
+ limiter = Limiter2,
+ blocked_consumers = Blocked1}),
+ State#state{consumers =
+ remove_consumer(ChPid, CTag, Consumers)}
+ end.
+
+-spec erase_ch(ch(), state()) ->
+ 'not_found' | {[ack()], [rabbit_types:ctag()],
+ state()}.
+
+erase_ch(ChPid, State = #state{consumers = Consumers}) ->
+ case lookup_ch(ChPid) of
+ not_found ->
+ not_found;
+ C = #cr{ch_pid = ChPid,
+ acktags = ChAckTags,
+ blocked_consumers = BlockedQ} ->
+ All = priority_queue:join(Consumers, BlockedQ),
+ ok = erase_ch_record(C),
+ Filtered = priority_queue:filter(chan_pred(ChPid, true), All),
+ {[AckTag || {AckTag, _CTag} <- ?QUEUE:to_list(ChAckTags)],
+ tags(priority_queue:to_list(Filtered)),
+ State#state{consumers = remove_consumers(ChPid, Consumers)}}
+ end.
+
+-spec send_drained() -> 'ok'.
+
+send_drained() -> [update_ch_record(send_drained(C)) || C <- all_ch_record()],
+ ok.
+
+-spec deliver(fun ((boolean()) -> {fetch_result(), T}),
+ rabbit_amqqueue:name(), state(), boolean(),
+ none | {ch(), rabbit_types:ctag()} | {ch(), consumer()}) ->
+ {'delivered', boolean(), T, state()} |
+ {'undelivered', boolean(), state()}.
+
+deliver(FetchFun, QName, State, SingleActiveConsumerIsOn, ActiveConsumer) ->
+ deliver(FetchFun, QName, false, State, SingleActiveConsumerIsOn, ActiveConsumer).
+
+deliver(_FetchFun, _QName, false, State, true, none) ->
+ {undelivered, false,
+ State#state{use = update_use(State#state.use, inactive)}};
+deliver(FetchFun, QName, false, State = #state{consumers = Consumers}, true, SingleActiveConsumer) ->
+ {ChPid, Consumer} = SingleActiveConsumer,
+ %% blocked (rate/prefetch limited) consumers are removed from the queue state, but not the exclusive_consumer field,
+ %% so we need to do this check to avoid adding the exclusive consumer to the channel record
+ %% over and over
+ case is_blocked(SingleActiveConsumer) of
+ true ->
+ {undelivered, false,
+ State#state{use = update_use(State#state.use, inactive)}};
+ false ->
+ case deliver_to_consumer(FetchFun, SingleActiveConsumer, QName) of
+ {delivered, R} ->
+ {delivered, false, R, State};
+ undelivered ->
+ {ChPid, Consumer} = SingleActiveConsumer,
+ Consumers1 = remove_consumer(ChPid, Consumer#consumer.tag, Consumers),
+ {undelivered, true,
+ State#state{consumers = Consumers1, use = update_use(State#state.use, inactive)}}
+ end
+ end;
+deliver(FetchFun, QName, ConsumersChanged,
+ State = #state{consumers = Consumers}, false, _SingleActiveConsumer) ->
+ case priority_queue:out_p(Consumers) of
+ {empty, _} ->
+ {undelivered, ConsumersChanged,
+ State#state{use = update_use(State#state.use, inactive)}};
+ {{value, QEntry, Priority}, Tail} ->
+ case deliver_to_consumer(FetchFun, QEntry, QName) of
+ {delivered, R} ->
+ {delivered, ConsumersChanged, R,
+ State#state{consumers = priority_queue:in(QEntry, Priority,
+ Tail)}};
+ undelivered ->
+ deliver(FetchFun, QName, true,
+ State#state{consumers = Tail}, false, _SingleActiveConsumer)
+ end
+ end.
+
+deliver_to_consumer(FetchFun, E = {ChPid, Consumer}, QName) ->
+ C = lookup_ch(ChPid),
+ case is_ch_blocked(C) of
+ true ->
+ block_consumer(C, E),
+ undelivered;
+ false -> case rabbit_limiter:can_send(C#cr.limiter,
+ Consumer#consumer.ack_required,
+ Consumer#consumer.tag) of
+ {suspend, Limiter} ->
+ block_consumer(C#cr{limiter = Limiter}, E),
+ undelivered;
+ {continue, Limiter} ->
+ {delivered, deliver_to_consumer(
+ FetchFun, Consumer,
+ C#cr{limiter = Limiter}, QName)}
+ end
+ end.
+
+deliver_to_consumer(FetchFun,
+ #consumer{tag = CTag,
+ ack_required = AckRequired},
+ C = #cr{ch_pid = ChPid,
+ acktags = ChAckTags,
+ unsent_message_count = Count},
+ QName) ->
+ {{Message, IsDelivered, AckTag}, R} = FetchFun(AckRequired),
+ rabbit_channel:deliver(ChPid, CTag, AckRequired,
+ {QName, self(), AckTag, IsDelivered, Message}),
+ ChAckTags1 = case AckRequired of
+ true -> ?QUEUE:in({AckTag, CTag}, ChAckTags);
+ false -> ChAckTags
+ end,
+ update_ch_record(C#cr{acktags = ChAckTags1,
+ unsent_message_count = Count + 1}),
+ R.
+
+is_blocked(Consumer = {ChPid, _C}) ->
+ #cr{blocked_consumers = BlockedConsumers} = lookup_ch(ChPid),
+ priority_queue:member(Consumer, BlockedConsumers).
+
+-spec record_ack(ch(), pid(), ack()) -> 'ok'.
+
+record_ack(ChPid, LimiterPid, AckTag) ->
+ C = #cr{acktags = ChAckTags} = ch_record(ChPid, LimiterPid),
+ update_ch_record(C#cr{acktags = ?QUEUE:in({AckTag, none}, ChAckTags)}),
+ ok.
+
+-spec subtract_acks(ch(), [ack()], state()) ->
+ 'not_found' | 'unchanged' | {'unblocked', state()}.
+
+subtract_acks(ChPid, AckTags, State) ->
+ case lookup_ch(ChPid) of
+ not_found ->
+ not_found;
+ C = #cr{acktags = ChAckTags, limiter = Lim} ->
+ {CTagCounts, AckTags2} = subtract_acks(
+ AckTags, [], maps:new(), ChAckTags),
+ {Unblocked, Lim2} =
+ maps:fold(
+ fun (CTag, Count, {UnblockedN, LimN}) ->
+ {Unblocked1, LimN1} =
+ rabbit_limiter:ack_from_queue(LimN, CTag, Count),
+ {UnblockedN orelse Unblocked1, LimN1}
+ end, {false, Lim}, CTagCounts),
+ C2 = C#cr{acktags = AckTags2, limiter = Lim2},
+ case Unblocked of
+ true -> unblock(C2, State);
+ false -> update_ch_record(C2),
+ unchanged
+ end
+ end.
+
+subtract_acks([], [], CTagCounts, AckQ) ->
+ {CTagCounts, AckQ};
+subtract_acks([], Prefix, CTagCounts, AckQ) ->
+ {CTagCounts, ?QUEUE:join(?QUEUE:from_list(lists:reverse(Prefix)), AckQ)};
+subtract_acks([T | TL] = AckTags, Prefix, CTagCounts, AckQ) ->
+ case ?QUEUE:out(AckQ) of
+ {{value, {T, CTag}}, QTail} ->
+ subtract_acks(TL, Prefix,
+ maps:update_with(CTag, fun (Old) -> Old + 1 end, 1, CTagCounts), QTail);
+ {{value, V}, QTail} ->
+ subtract_acks(AckTags, [V | Prefix], CTagCounts, QTail);
+ {empty, _} ->
+ subtract_acks([], Prefix, CTagCounts, AckQ)
+ end.
+
+-spec possibly_unblock(cr_fun(), ch(), state()) ->
+ 'unchanged' | {'unblocked', state()}.
+
+possibly_unblock(Update, ChPid, State) ->
+ case lookup_ch(ChPid) of
+ not_found -> unchanged;
+ C -> C1 = Update(C),
+ case is_ch_blocked(C) andalso not is_ch_blocked(C1) of
+ false -> update_ch_record(C1),
+ unchanged;
+ true -> unblock(C1, State)
+ end
+ end.
+
+unblock(C = #cr{blocked_consumers = BlockedQ, limiter = Limiter},
+ State = #state{consumers = Consumers, use = Use}) ->
+ case lists:partition(
+ fun({_P, {_ChPid, #consumer{tag = CTag}}}) ->
+ rabbit_limiter:is_consumer_blocked(Limiter, CTag)
+ end, priority_queue:to_list(BlockedQ)) of
+ {_, []} ->
+ update_ch_record(C),
+ unchanged;
+ {Blocked, Unblocked} ->
+ BlockedQ1 = priority_queue:from_list(Blocked),
+ UnblockedQ = priority_queue:from_list(Unblocked),
+ update_ch_record(C#cr{blocked_consumers = BlockedQ1}),
+ {unblocked,
+ State#state{consumers = priority_queue:join(Consumers, UnblockedQ),
+ use = update_use(Use, active)}}
+ end.
+
+-spec resume_fun() -> cr_fun().
+
+resume_fun() ->
+ fun (C = #cr{limiter = Limiter}) ->
+ C#cr{limiter = rabbit_limiter:resume(Limiter)}
+ end.
+
+-spec notify_sent_fun(non_neg_integer()) -> cr_fun().
+
+notify_sent_fun(Credit) ->
+ fun (C = #cr{unsent_message_count = Count}) ->
+ C#cr{unsent_message_count = Count - Credit}
+ end.
+
+-spec activate_limit_fun() -> cr_fun().
+
+activate_limit_fun() ->
+ fun (C = #cr{limiter = Limiter}) ->
+ C#cr{limiter = rabbit_limiter:activate(Limiter)}
+ end.
+
+-spec credit(boolean(), integer(), boolean(), ch(), rabbit_types:ctag(),
+ state()) -> 'unchanged' | {'unblocked', state()}.
+
+credit(IsEmpty, Credit, Drain, ChPid, CTag, State) ->
+ case lookup_ch(ChPid) of
+ not_found ->
+ unchanged;
+ #cr{limiter = Limiter} = C ->
+ C1 = #cr{limiter = Limiter1} =
+ credit_and_drain(C, CTag, Credit, drain_mode(Drain), IsEmpty),
+ case is_ch_blocked(C1) orelse
+ (not rabbit_limiter:is_consumer_blocked(Limiter, CTag)) orelse
+ rabbit_limiter:is_consumer_blocked(Limiter1, CTag) of
+ true -> update_ch_record(C1),
+ unchanged;
+ false -> unblock(C1, State)
+ end
+ end.
+
+drain_mode(true) -> drain;
+drain_mode(false) -> manual.
+
+-spec utilisation(state()) -> ratio().
+
+utilisation(#state{use = {active, Since, Avg}}) ->
+ use_avg(erlang:monotonic_time(micro_seconds) - Since, 0, Avg);
+utilisation(#state{use = {inactive, Since, Active, Avg}}) ->
+ use_avg(Active, erlang:monotonic_time(micro_seconds) - Since, Avg).
+
+is_same(ChPid, ConsumerTag, {ChPid, #consumer{tag = ConsumerTag}}) ->
+ true;
+is_same(_ChPid, _ConsumerTag, _Consumer) ->
+ false.
+
+get_consumer(#state{consumers = Consumers}) ->
+ case priority_queue:out_p(Consumers) of
+ {{value, Consumer, _Priority}, _Tail} -> Consumer;
+ {empty, _} -> undefined
+ end.
+
+-spec get(ch(), rabbit_types:ctag(), state()) -> undefined | consumer().
+
+get(ChPid, ConsumerTag, #state{consumers = Consumers}) ->
+ Consumers1 = priority_queue:filter(fun ({CP, #consumer{tag = CT}}) ->
+ (CP == ChPid) and (CT == ConsumerTag)
+ end, Consumers),
+ case priority_queue:out_p(Consumers1) of
+ {empty, _} -> undefined;
+ {{value, Consumer, _Priority}, _Tail} -> Consumer
+ end.
+
+-spec get_infos(consumer()) -> term().
+
+get_infos(Consumer) ->
+ {Consumer#consumer.tag,Consumer#consumer.ack_required,
+ Consumer#consumer.prefetch, Consumer#consumer.args}.
+
+-spec consumer_tag(consumer()) -> rabbit_types:ctag().
+
+consumer_tag(#consumer{tag = CTag}) ->
+ CTag.
+
+
+
+%%----------------------------------------------------------------------------
+
+parse_credit_args(Default, Args) ->
+ case rabbit_misc:table_lookup(Args, <<"x-credit">>) of
+ {table, T} -> case {rabbit_misc:table_lookup(T, <<"credit">>),
+ rabbit_misc:table_lookup(T, <<"drain">>)} of
+ {{long, C}, {bool, D}} -> {C, drain_mode(D)};
+ _ -> {Default, auto}
+ end;
+ undefined -> {Default, auto}
+ end.
+
+lookup_ch(ChPid) ->
+ case get({ch, ChPid}) of
+ undefined -> not_found;
+ C -> C
+ end.
+
+ch_record(ChPid, LimiterPid) ->
+ Key = {ch, ChPid},
+ case get(Key) of
+ undefined -> MonitorRef = erlang:monitor(process, ChPid),
+ Limiter = rabbit_limiter:client(LimiterPid),
+ C = #cr{ch_pid = ChPid,
+ monitor_ref = MonitorRef,
+ acktags = ?QUEUE:new(),
+ consumer_count = 0,
+ blocked_consumers = priority_queue:new(),
+ limiter = Limiter,
+ unsent_message_count = 0},
+ put(Key, C),
+ C;
+ C = #cr{} -> C
+ end.
+
+update_ch_record(C = #cr{consumer_count = ConsumerCount,
+ acktags = ChAckTags,
+ unsent_message_count = UnsentMessageCount}) ->
+ case {?QUEUE:is_empty(ChAckTags), ConsumerCount, UnsentMessageCount} of
+ {true, 0, 0} -> ok = erase_ch_record(C);
+ _ -> ok = store_ch_record(C)
+ end,
+ C.
+
+store_ch_record(C = #cr{ch_pid = ChPid}) ->
+ put({ch, ChPid}, C),
+ ok.
+
+erase_ch_record(#cr{ch_pid = ChPid, monitor_ref = MonitorRef}) ->
+ erlang:demonitor(MonitorRef),
+ erase({ch, ChPid}),
+ ok.
+
+all_ch_record() -> [C || {{ch, _}, C} <- get()].
+
+block_consumer(C = #cr{blocked_consumers = Blocked}, QEntry) ->
+ update_ch_record(C#cr{blocked_consumers = add_consumer(QEntry, Blocked)}).
+
+is_ch_blocked(#cr{unsent_message_count = Count, limiter = Limiter}) ->
+ Count >= ?UNSENT_MESSAGE_LIMIT orelse rabbit_limiter:is_suspended(Limiter).
+
+send_drained(C = #cr{ch_pid = ChPid, limiter = Limiter}) ->
+ case rabbit_limiter:drained(Limiter) of
+ {[], Limiter} -> C;
+ {CTagCredit, Limiter2} -> rabbit_channel:send_drained(
+ ChPid, CTagCredit),
+ C#cr{limiter = Limiter2}
+ end.
+
+credit_and_drain(C = #cr{ch_pid = ChPid, limiter = Limiter},
+ CTag, Credit, Mode, IsEmpty) ->
+ case rabbit_limiter:credit(Limiter, CTag, Credit, Mode, IsEmpty) of
+ {true, Limiter1} -> rabbit_channel:send_drained(ChPid,
+ [{CTag, Credit}]),
+ C#cr{limiter = Limiter1};
+ {false, Limiter1} -> C#cr{limiter = Limiter1}
+ end.
+
+tags(CList) -> [CTag || {_P, {_ChPid, #consumer{tag = CTag}}} <- CList].
+
+add_consumer({ChPid, Consumer = #consumer{args = Args}}, Queue) ->
+ Priority = case rabbit_misc:table_lookup(Args, <<"x-priority">>) of
+ {_, P} -> P;
+ _ -> 0
+ end,
+ priority_queue:in({ChPid, Consumer}, Priority, Queue).
+
+remove_consumer(ChPid, CTag, Queue) ->
+ priority_queue:filter(fun ({CP, #consumer{tag = CT}}) ->
+ (CP /= ChPid) or (CT /= CTag)
+ end, Queue).
+
+remove_consumers(ChPid, Queue) ->
+ priority_queue:filter(chan_pred(ChPid, false), Queue).
+
+chan_pred(ChPid, Want) ->
+ fun ({CP, _Consumer}) when CP =:= ChPid -> Want;
+ (_) -> not Want
+ end.
+
+update_use({inactive, _, _, _} = CUInfo, inactive) ->
+ CUInfo;
+update_use({active, _, _} = CUInfo, active) ->
+ CUInfo;
+update_use({active, Since, Avg}, inactive) ->
+ Now = erlang:monotonic_time(micro_seconds),
+ {inactive, Now, Now - Since, Avg};
+update_use({inactive, Since, Active, Avg}, active) ->
+ Now = erlang:monotonic_time(micro_seconds),
+ {active, Now, use_avg(Active, Now - Since, Avg)}.
+
+use_avg(0, 0, Avg) ->
+ Avg;
+use_avg(Active, Inactive, Avg) ->
+ Time = Inactive + Active,
+ rabbit_misc:moving_average(Time, ?USE_AVG_HALF_LIFE, Active / Time, Avg).
diff --git a/deps/rabbit/src/rabbit_queue_decorator.erl b/deps/rabbit/src/rabbit_queue_decorator.erl
new file mode 100644
index 0000000000..cbb50456c1
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_decorator.erl
@@ -0,0 +1,72 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_decorator).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-export([select/1, set/1, register/2, unregister/1]).
+
+-behaviour(rabbit_registry_class).
+
+-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]).
+
+%%----------------------------------------------------------------------------
+
+-callback startup(amqqueue:amqqueue()) -> 'ok'.
+
+-callback shutdown(amqqueue:amqqueue()) -> 'ok'.
+
+-callback policy_changed(amqqueue:amqqueue(), amqqueue:amqqueue()) ->
+ 'ok'.
+
+-callback active_for(amqqueue:amqqueue()) -> boolean().
+
+%% called with Queue, MaxActivePriority, IsEmpty
+-callback consumer_state_changed(
+ amqqueue:amqqueue(), integer(), boolean()) -> 'ok'.
+
+%%----------------------------------------------------------------------------
+
+added_to_rabbit_registry(_Type, _ModuleName) -> ok.
+removed_from_rabbit_registry(_Type) -> ok.
+
+select(Modules) ->
+ [M || M <- Modules, code:which(M) =/= non_existing].
+
+set(Q) when ?is_amqqueue(Q) ->
+ Decorators = [D || D <- list(), D:active_for(Q)],
+ amqqueue:set_decorators(Q, Decorators).
+
+list() -> [M || {_, M} <- rabbit_registry:lookup_all(queue_decorator)].
+
+register(TypeName, ModuleName) ->
+ rabbit_registry:register(queue_decorator, TypeName, ModuleName),
+ [maybe_recover(Q) || Q <- rabbit_amqqueue:list()],
+ ok.
+
+unregister(TypeName) ->
+ rabbit_registry:unregister(queue_decorator, TypeName),
+ [maybe_recover(Q) || Q <- rabbit_amqqueue:list()],
+ ok.
+
+maybe_recover(Q0) when ?is_amqqueue(Q0) ->
+ Name = amqqueue:get_name(Q0),
+ Decs0 = amqqueue:get_decorators(Q0),
+ Q1 = set(Q0),
+ Decs1 = amqqueue:get_decorators(Q1),
+ Old = lists:sort(select(Decs0)),
+ New = lists:sort(select(Decs1)),
+ case New of
+ Old ->
+ ok;
+ _ ->
+ %% TODO LRB JSP 160169569 should startup be passed Q1 here?
+ [M:startup(Q0) || M <- New -- Old],
+ rabbit_amqqueue:update_decorators(Name)
+ end.
diff --git a/deps/rabbit/src/rabbit_queue_index.erl b/deps/rabbit/src/rabbit_queue_index.erl
new file mode 100644
index 0000000000..faab4380b5
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_index.erl
@@ -0,0 +1,1521 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_index).
+
+-export([erase/1, init/3, reset_state/1, recover/6,
+ terminate/3, delete_and_terminate/1,
+ pre_publish/7, flush_pre_publish_cache/2,
+ publish/6, deliver/2, ack/2, sync/1, needs_sync/1, flush/1,
+ read/3, next_segment_boundary/1, bounds/1, start/2, stop/1]).
+
+-export([add_queue_ttl/0, avoid_zeroes/0, store_msg_size/0, store_msg/0]).
+-export([scan_queue_segments/3, scan_queue_segments/4]).
+
+%% Migrates from global to per-vhost message stores
+-export([move_to_per_vhost_stores/1,
+ update_recovery_term/2,
+ read_global_recovery_terms/1,
+ cleanup_global_recovery_terms/0]).
+
+-define(CLEAN_FILENAME, "clean.dot").
+
+%%----------------------------------------------------------------------------
+
+%% The queue index is responsible for recording the order of messages
+%% within a queue on disk. As such it contains records of messages
+%% being published, delivered and acknowledged. The publish record
+%% includes the sequence ID, message ID and a small quantity of
+%% metadata about the message; the delivery and acknowledgement
+%% records just contain the sequence ID. A publish record may also
+%% contain the complete message if provided to publish/5; this allows
+%% the message store to be avoided altogether for small messages. In
+%% either case the publish record is stored in memory in the same
+%% serialised format it will take on disk.
+%%
+%% Because of the fact that the queue can decide at any point to send
+%% a queue entry to disk, you can not rely on publishes appearing in
+%% order. The only thing you can rely on is a message being published,
+%% then delivered, then ack'd.
+%%
+%% In order to be able to clean up ack'd messages, we write to segment
+%% files. These files have a fixed number of entries: ?SEGMENT_ENTRY_COUNT
+%% publishes, delivers and acknowledgements. They are numbered, and so
+%% it is known that the 0th segment contains messages 0 ->
+%% ?SEGMENT_ENTRY_COUNT - 1, the 1st segment contains messages
+%% ?SEGMENT_ENTRY_COUNT -> 2*?SEGMENT_ENTRY_COUNT - 1 and so on. As
+%% such, in the segment files, we only refer to message sequence ids
+%% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a
+%% fixed size.
+%%
+%% However, transient messages which are not sent to disk at any point
+%% will cause gaps to appear in segment files. Therefore, we delete a
+%% segment file whenever the number of publishes == number of acks
+%% (note that although it is not fully enforced, it is assumed that a
+%% message will never be ackd before it is delivered, thus this test
+%% also implies == number of delivers). In practise, this does not
+%% cause disk churn in the pathological case because of the journal
+%% and caching (see below).
+%%
+%% Because of the fact that publishes, delivers and acks can occur all
+%% over, we wish to avoid lots of seeking. Therefore we have a fixed
+%% sized journal to which all actions are appended. When the number of
+%% entries in this journal reaches max_journal_entries, the journal
+%% entries are scattered out to their relevant files, and the journal
+%% is truncated to zero size. Note that entries in the journal must
+%% carry the full sequence id, thus the format of entries in the
+%% journal is different to that in the segments.
+%%
+%% The journal is also kept fully in memory, pre-segmented: the state
+%% contains a mapping from segment numbers to state-per-segment (this
+%% state is held for all segments which have been "seen": thus a
+%% segment which has been read but has no pending entries in the
+%% journal is still held in this mapping. Also note that a map is
+%% used for this mapping, not an array because with an array, you will
+%% always have entries from 0). Actions are stored directly in this
+%% state. Thus at the point of flushing the journal, firstly no
+%% reading from disk is necessary, but secondly if the known number of
+%% acks and publishes in a segment are equal, given the known state of
+%% the segment file combined with the journal, no writing needs to be
+%% done to the segment file either (in fact it is deleted if it exists
+%% at all). This is safe given that the set of acks is a subset of the
+%% set of publishes. When it is necessary to sync messages, it is
+%% sufficient to fsync on the journal: when entries are distributed
+%% from the journal to segment files, those segments appended to are
+%% fsync'd prior to the journal being truncated.
+%%
+%% This module is also responsible for scanning the queue index files
+%% and seeding the message store on start up.
+%%
+%% Note that in general, the representation of a message's state as
+%% the tuple: {('no_pub'|{IsPersistent, Bin, MsgBin}),
+%% ('del'|'no_del'), ('ack'|'no_ack')} is richer than strictly
+%% necessary for most operations. However, for startup, and to ensure
+%% the safe and correct combination of journal entries with entries
+%% read from the segment on disk, this richer representation vastly
+%% simplifies and clarifies the code.
+%%
+%% For notes on Clean Shutdown and startup, see documentation in
+%% rabbit_variable_queue.
+%%
+%%----------------------------------------------------------------------------
+
+%% ---- Journal details ----
+
+-define(JOURNAL_FILENAME, "journal.jif").
+-define(QUEUE_NAME_STUB_FILE, ".queue_name").
+
+-define(PUB_PERSIST_JPREFIX, 2#00).
+-define(PUB_TRANS_JPREFIX, 2#01).
+-define(DEL_JPREFIX, 2#10).
+-define(ACK_JPREFIX, 2#11).
+-define(JPREFIX_BITS, 2).
+-define(SEQ_BYTES, 8).
+-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)).
+
+%% ---- Segment details ----
+
+-define(SEGMENT_EXTENSION, ".idx").
+
+%% TODO: The segment size would be configurable, but deriving all the
+%% other values is quite hairy and quite possibly noticeably less
+%% efficient, depending on how clever the compiler is when it comes to
+%% binary generation/matching with constant vs variable lengths.
+
+-define(REL_SEQ_BITS, 14).
+%% calculated as trunc(math:pow(2,?REL_SEQ_BITS))).
+-define(SEGMENT_ENTRY_COUNT, 16384).
+
+%% seq only is binary 01 followed by 14 bits of rel seq id
+%% (range: 0 - 16383)
+-define(REL_SEQ_ONLY_PREFIX, 01).
+-define(REL_SEQ_ONLY_PREFIX_BITS, 2).
+-define(REL_SEQ_ONLY_RECORD_BYTES, 2).
+
+%% publish record is binary 1 followed by a bit for is_persistent,
+%% then 14 bits of rel seq id, 64 bits for message expiry, 32 bits of
+%% size and then 128 bits of md5sum msg id.
+-define(PUB_PREFIX, 1).
+-define(PUB_PREFIX_BITS, 1).
+
+-define(EXPIRY_BYTES, 8).
+-define(EXPIRY_BITS, (?EXPIRY_BYTES * 8)).
+-define(NO_EXPIRY, 0).
+
+-define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
+-define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)).
+
+%% This is the size of the message body content, for stats
+-define(SIZE_BYTES, 4).
+-define(SIZE_BITS, (?SIZE_BYTES * 8)).
+
+%% This is the size of the message record embedded in the queue
+%% index. If 0, the message can be found in the message store.
+-define(EMBEDDED_SIZE_BYTES, 4).
+-define(EMBEDDED_SIZE_BITS, (?EMBEDDED_SIZE_BYTES * 8)).
+
+%% 16 bytes for md5sum + 8 for expiry
+-define(PUB_RECORD_BODY_BYTES, (?MSG_ID_BYTES + ?EXPIRY_BYTES + ?SIZE_BYTES)).
+%% + 4 for size
+-define(PUB_RECORD_SIZE_BYTES, (?PUB_RECORD_BODY_BYTES + ?EMBEDDED_SIZE_BYTES)).
+
+%% + 2 for seq, bits and prefix
+-define(PUB_RECORD_PREFIX_BYTES, 2).
+
+%% ---- misc ----
+
+-define(PUB, {_, _, _}). %% {IsPersistent, Bin, MsgBin}
+
+-define(READ_MODE, [binary, raw, read]).
+-define(WRITE_MODE, [write | ?READ_MODE]).
+
+%%----------------------------------------------------------------------------
+
+-record(qistate, {
+ %% queue directory where segment and journal files are stored
+ dir,
+ %% map of #segment records
+ segments,
+ %% journal file handle obtained from/used by file_handle_cache
+ journal_handle,
+ %% how many not yet flushed entries are there
+ dirty_count,
+ %% this many not yet flushed journal entries will force a flush
+ max_journal_entries,
+ %% callback function invoked when a message is "handled"
+ %% by the index and potentially can be confirmed to the publisher
+ on_sync,
+ on_sync_msg,
+ %% set of IDs of unconfirmed [to publishers] messages
+ unconfirmed,
+ unconfirmed_msg,
+ %% optimisation
+ pre_publish_cache,
+ %% optimisation
+ delivered_cache,
+ %% queue name resource record
+ queue_name}).
+
+-record(segment, {
+ %% segment ID (an integer)
+ num,
+ %% segment file path (see also ?SEGMENT_EXTENSION)
+ path,
+ %% index operation log entries in this segment
+ journal_entries,
+ entries_to_segment,
+ %% counter of unacknowledged messages
+ unacked
+}).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-rabbit_upgrade({add_queue_ttl, local, []}).
+-rabbit_upgrade({avoid_zeroes, local, [add_queue_ttl]}).
+-rabbit_upgrade({store_msg_size, local, [avoid_zeroes]}).
+-rabbit_upgrade({store_msg, local, [store_msg_size]}).
+
+-type hdl() :: ('undefined' | any()).
+-type segment() :: ('undefined' |
+ #segment { num :: non_neg_integer(),
+ path :: file:filename(),
+ journal_entries :: array:array(),
+ entries_to_segment :: array:array(),
+ unacked :: non_neg_integer()
+ }).
+-type seq_id() :: integer().
+-type seg_map() :: {map(), [segment()]}.
+-type on_sync_fun() :: fun ((gb_sets:set()) -> ok).
+-type qistate() :: #qistate { dir :: file:filename(),
+ segments :: 'undefined' | seg_map(),
+ journal_handle :: hdl(),
+ dirty_count :: integer(),
+ max_journal_entries :: non_neg_integer(),
+ on_sync :: on_sync_fun(),
+ on_sync_msg :: on_sync_fun(),
+ unconfirmed :: gb_sets:set(),
+ unconfirmed_msg :: gb_sets:set(),
+ pre_publish_cache :: list(),
+ delivered_cache :: list()
+ }.
+-type contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean()).
+-type walker(A) :: fun ((A) -> 'finished' |
+ {rabbit_types:msg_id(), non_neg_integer(), A}).
+-type shutdown_terms() :: [term()] | 'non_clean_shutdown'.
+
+%%----------------------------------------------------------------------------
+%% public API
+%%----------------------------------------------------------------------------
+
+-spec erase(rabbit_amqqueue:name()) -> 'ok'.
+
+erase(#resource{ virtual_host = VHost } = Name) ->
+ VHostDir = rabbit_vhost:msg_store_dir_path(VHost),
+ #qistate { dir = Dir } = blank_state(VHostDir, Name),
+ erase_index_dir(Dir).
+
+%% used during variable queue purge when there are no pending acks
+
+-spec reset_state(qistate()) -> qistate().
+
+reset_state(#qistate{ queue_name = Name,
+ dir = Dir,
+ on_sync = OnSyncFun,
+ on_sync_msg = OnSyncMsgFun,
+ journal_handle = JournalHdl }) ->
+ ok = case JournalHdl of
+ undefined -> ok;
+ _ -> file_handle_cache:close(JournalHdl)
+ end,
+ ok = erase_index_dir(Dir),
+ blank_state_name_dir_funs(Name, Dir, OnSyncFun, OnSyncMsgFun).
+
+-spec init(rabbit_amqqueue:name(),
+ on_sync_fun(), on_sync_fun()) -> qistate().
+
+init(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) ->
+ VHostDir = rabbit_vhost:msg_store_dir_path(VHost),
+ State = #qistate { dir = Dir } = blank_state(VHostDir, Name),
+ false = rabbit_file:is_file(Dir), %% is_file == is file or dir
+ State#qistate{on_sync = OnSyncFun,
+ on_sync_msg = OnSyncMsgFun}.
+
+-spec recover(rabbit_amqqueue:name(), shutdown_terms(), boolean(),
+ contains_predicate(),
+ on_sync_fun(), on_sync_fun()) ->
+ {'undefined' | non_neg_integer(),
+ 'undefined' | non_neg_integer(), qistate()}.
+
+recover(#resource{ virtual_host = VHost } = Name, Terms, MsgStoreRecovered,
+ ContainsCheckFun, OnSyncFun, OnSyncMsgFun) ->
+ VHostDir = rabbit_vhost:msg_store_dir_path(VHost),
+ State = blank_state(VHostDir, Name),
+ State1 = State #qistate{on_sync = OnSyncFun,
+ on_sync_msg = OnSyncMsgFun},
+ CleanShutdown = Terms /= non_clean_shutdown,
+ case CleanShutdown andalso MsgStoreRecovered of
+ true -> RecoveredCounts = proplists:get_value(segments, Terms, []),
+ init_clean(RecoveredCounts, State1);
+ false -> init_dirty(CleanShutdown, ContainsCheckFun, State1)
+ end.
+
+-spec terminate(rabbit_types:vhost(), [any()], qistate()) -> qistate().
+
+terminate(VHost, Terms, State = #qistate { dir = Dir }) ->
+ {SegmentCounts, State1} = terminate(State),
+ rabbit_recovery_terms:store(VHost, filename:basename(Dir),
+ [{segments, SegmentCounts} | Terms]),
+ State1.
+
+-spec delete_and_terminate(qistate()) -> qistate().
+
+delete_and_terminate(State) ->
+ {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State),
+ ok = rabbit_file:recursive_delete([Dir]),
+ State1.
+
+pre_publish(MsgOrId, SeqId, MsgProps, IsPersistent, IsDelivered, JournalSizeHint,
+ State = #qistate{pre_publish_cache = PPC,
+ delivered_cache = DC}) ->
+ State1 = maybe_needs_confirming(MsgProps, MsgOrId, State),
+
+ {Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps),
+
+ PPC1 =
+ [[<<(case IsPersistent of
+ true -> ?PUB_PERSIST_JPREFIX;
+ false -> ?PUB_TRANS_JPREFIX
+ end):?JPREFIX_BITS,
+ SeqId:?SEQ_BITS, Bin/binary,
+ (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin] | PPC],
+
+ DC1 =
+ case IsDelivered of
+ true ->
+ [SeqId | DC];
+ false ->
+ DC
+ end,
+
+ State2 = add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1),
+ maybe_flush_pre_publish_cache(
+ JournalSizeHint,
+ State2#qistate{pre_publish_cache = PPC1,
+ delivered_cache = DC1}).
+
+%% pre_publish_cache is the entry with most elements when compared to
+%% delivered_cache so we only check the former in the guard.
+maybe_flush_pre_publish_cache(JournalSizeHint,
+ #qistate{pre_publish_cache = PPC} = State)
+ when length(PPC) >= ?SEGMENT_ENTRY_COUNT ->
+ flush_pre_publish_cache(JournalSizeHint, State);
+maybe_flush_pre_publish_cache(_JournalSizeHint, State) ->
+ State.
+
+flush_pre_publish_cache(JournalSizeHint, State) ->
+ State1 = flush_pre_publish_cache(State),
+ State2 = flush_delivered_cache(State1),
+ maybe_flush_journal(JournalSizeHint, State2).
+
+flush_pre_publish_cache(#qistate{pre_publish_cache = []} = State) ->
+ State;
+flush_pre_publish_cache(State = #qistate{pre_publish_cache = PPC}) ->
+ {JournalHdl, State1} = get_journal_handle(State),
+ file_handle_cache_stats:update(queue_index_journal_write),
+ ok = file_handle_cache:append(JournalHdl, lists:reverse(PPC)),
+ State1#qistate{pre_publish_cache = []}.
+
+flush_delivered_cache(#qistate{delivered_cache = []} = State) ->
+ State;
+flush_delivered_cache(State = #qistate{delivered_cache = DC}) ->
+ State1 = deliver(lists:reverse(DC), State),
+ State1#qistate{delivered_cache = []}.
+
+-spec publish(rabbit_types:msg_id(), seq_id(),
+ rabbit_types:message_properties(), boolean(),
+ non_neg_integer(), qistate()) -> qistate().
+
+publish(MsgOrId, SeqId, MsgProps, IsPersistent, JournalSizeHint, State) ->
+ {JournalHdl, State1} =
+ get_journal_handle(
+ maybe_needs_confirming(MsgProps, MsgOrId, State)),
+ file_handle_cache_stats:update(queue_index_journal_write),
+ {Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps),
+ ok = file_handle_cache:append(
+ JournalHdl, [<<(case IsPersistent of
+ true -> ?PUB_PERSIST_JPREFIX;
+ false -> ?PUB_TRANS_JPREFIX
+ end):?JPREFIX_BITS,
+ SeqId:?SEQ_BITS, Bin/binary,
+ (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin]),
+ maybe_flush_journal(
+ JournalSizeHint,
+ add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1)).
+
+maybe_needs_confirming(MsgProps, MsgOrId,
+ State = #qistate{unconfirmed = UC,
+ unconfirmed_msg = UCM}) ->
+ MsgId = case MsgOrId of
+ #basic_message{id = Id} -> Id;
+ Id when is_binary(Id) -> Id
+ end,
+ ?MSG_ID_BYTES = size(MsgId),
+ case {MsgProps#message_properties.needs_confirming, MsgOrId} of
+ {true, MsgId} -> UC1 = gb_sets:add_element(MsgId, UC),
+ State#qistate{unconfirmed = UC1};
+ {true, _} -> UCM1 = gb_sets:add_element(MsgId, UCM),
+ State#qistate{unconfirmed_msg = UCM1};
+ {false, _} -> State
+ end.
+
+-spec deliver([seq_id()], qistate()) -> qistate().
+
+deliver(SeqIds, State) ->
+ deliver_or_ack(del, SeqIds, State).
+
+-spec ack([seq_id()], qistate()) -> qistate().
+
+ack(SeqIds, State) ->
+ deliver_or_ack(ack, SeqIds, State).
+
+%% This is called when there are outstanding confirms or when the
+%% queue is idle and the journal needs syncing (see needs_sync/1).
+
+-spec sync(qistate()) -> qistate().
+
+sync(State = #qistate { journal_handle = undefined }) ->
+ State;
+sync(State = #qistate { journal_handle = JournalHdl }) ->
+ ok = file_handle_cache:sync(JournalHdl),
+ notify_sync(State).
+
+-spec needs_sync(qistate()) -> 'confirms' | 'other' | 'false'.
+
+needs_sync(#qistate{journal_handle = undefined}) ->
+ false;
+needs_sync(#qistate{journal_handle = JournalHdl,
+ unconfirmed = UC,
+ unconfirmed_msg = UCM}) ->
+ case gb_sets:is_empty(UC) andalso gb_sets:is_empty(UCM) of
+ true -> case file_handle_cache:needs_sync(JournalHdl) of
+ true -> other;
+ false -> false
+ end;
+ false -> confirms
+ end.
+
+-spec flush(qistate()) -> qistate().
+
+flush(State = #qistate { dirty_count = 0 }) -> State;
+flush(State) -> flush_journal(State).
+
+-spec read(seq_id(), seq_id(), qistate()) ->
+ {[{rabbit_types:msg_id(), seq_id(),
+ rabbit_types:message_properties(),
+ boolean(), boolean()}], qistate()}.
+
+read(StartEnd, StartEnd, State) ->
+ {[], State};
+read(Start, End, State = #qistate { segments = Segments,
+ dir = Dir }) when Start =< End ->
+ %% Start is inclusive, End is exclusive.
+ LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start),
+ UpperB = {EndSeg, _EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End - 1),
+ {Messages, Segments1} =
+ lists:foldr(fun (Seg, Acc) ->
+ read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir)
+ end, {[], Segments}, lists:seq(StartSeg, EndSeg)),
+ {Messages, State #qistate { segments = Segments1 }}.
+
+-spec next_segment_boundary(seq_id()) -> seq_id().
+
+next_segment_boundary(SeqId) ->
+ {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+ reconstruct_seq_id(Seg + 1, 0).
+
+-spec bounds(qistate()) ->
+ {non_neg_integer(), non_neg_integer(), qistate()}.
+
+bounds(State = #qistate { segments = Segments }) ->
+ %% This is not particularly efficient, but only gets invoked on
+ %% queue initialisation.
+ SegNums = lists:sort(segment_nums(Segments)),
+ %% Don't bother trying to figure out the lowest seq_id, merely the
+ %% seq_id of the start of the lowest segment. That seq_id may not
+ %% actually exist, but that's fine. The important thing is that
+ %% the segment exists and the seq_id reported is on a segment
+ %% boundary.
+ %%
+ %% We also don't really care about the max seq_id. Just start the
+ %% next segment: it makes life much easier.
+ %%
+ %% SegNums is sorted, ascending.
+ {LowSeqId, NextSeqId} =
+ case SegNums of
+ [] -> {0, 0};
+ [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0),
+ reconstruct_seq_id(1 + lists:last(SegNums), 0)}
+ end,
+ {LowSeqId, NextSeqId, State}.
+
+-spec start(rabbit_types:vhost(), [rabbit_amqqueue:name()]) -> {[[any()]], {walker(A), A}}.
+
+start(VHost, DurableQueueNames) ->
+ ok = rabbit_recovery_terms:start(VHost),
+ {DurableTerms, DurableDirectories} =
+ lists:foldl(
+ fun(QName, {RecoveryTerms, ValidDirectories}) ->
+ DirName = queue_name_to_dir_name(QName),
+ RecoveryInfo = case rabbit_recovery_terms:read(VHost, DirName) of
+ {error, _} -> non_clean_shutdown;
+ {ok, Terms} -> Terms
+ end,
+ {[RecoveryInfo | RecoveryTerms],
+ sets:add_element(DirName, ValidDirectories)}
+ end, {[], sets:new()}, DurableQueueNames),
+ %% Any queue directory we've not been asked to recover is considered garbage
+ rabbit_file:recursive_delete(
+ [DirName ||
+ DirName <- all_queue_directory_names(VHost),
+ not sets:is_element(filename:basename(DirName), DurableDirectories)]),
+ rabbit_recovery_terms:clear(VHost),
+
+ %% The backing queue interface requires that the queue recovery terms
+ %% which come back from start/1 are in the same order as DurableQueueNames
+ OrderedTerms = lists:reverse(DurableTerms),
+ {OrderedTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}.
+
+
+stop(VHost) -> rabbit_recovery_terms:stop(VHost).
+
+all_queue_directory_names(VHost) ->
+ filelib:wildcard(filename:join([rabbit_vhost:msg_store_dir_path(VHost),
+ "queues", "*"])).
+
+all_queue_directory_names() ->
+ filelib:wildcard(filename:join([rabbit_vhost:msg_store_dir_wildcard(),
+ "queues", "*"])).
+
+%%----------------------------------------------------------------------------
+%% startup and shutdown
+%%----------------------------------------------------------------------------
+
+erase_index_dir(Dir) ->
+ case rabbit_file:is_dir(Dir) of
+ true -> rabbit_file:recursive_delete([Dir]);
+ false -> ok
+ end.
+
+blank_state(VHostDir, QueueName) ->
+ Dir = queue_dir(VHostDir, QueueName),
+ blank_state_name_dir_funs(QueueName,
+ Dir,
+ fun (_) -> ok end,
+ fun (_) -> ok end).
+
+queue_dir(VHostDir, QueueName) ->
+ %% Queue directory is
+ %% {node_database_dir}/msg_stores/vhosts/{vhost}/queues/{queue}
+ QueueDir = queue_name_to_dir_name(QueueName),
+ filename:join([VHostDir, "queues", QueueDir]).
+
+queue_name_to_dir_name(#resource { kind = queue,
+ virtual_host = VHost,
+ name = QName }) ->
+ <<Num:128>> = erlang:md5(<<"queue", VHost/binary, QName/binary>>),
+ rabbit_misc:format("~.36B", [Num]).
+
+queue_name_to_dir_name_legacy(Name = #resource { kind = queue }) ->
+ <<Num:128>> = erlang:md5(term_to_binary_compat:term_to_binary_1(Name)),
+ rabbit_misc:format("~.36B", [Num]).
+
+queues_base_dir() ->
+ rabbit_mnesia:dir().
+
+blank_state_name_dir_funs(Name, Dir, OnSyncFun, OnSyncMsgFun) ->
+ {ok, MaxJournal} =
+ application:get_env(rabbit, queue_index_max_journal_entries),
+ #qistate { dir = Dir,
+ segments = segments_new(),
+ journal_handle = undefined,
+ dirty_count = 0,
+ max_journal_entries = MaxJournal,
+ on_sync = OnSyncFun,
+ on_sync_msg = OnSyncMsgFun,
+ unconfirmed = gb_sets:new(),
+ unconfirmed_msg = gb_sets:new(),
+ pre_publish_cache = [],
+ delivered_cache = [],
+ queue_name = Name }.
+
+init_clean(RecoveredCounts, State) ->
+ %% Load the journal. Since this is a clean recovery this (almost)
+ %% gets us back to where we were on shutdown.
+ State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State),
+ %% The journal loading only creates records for segments touched
+ %% by the journal, and the counts are based on the journal entries
+ %% only. We need *complete* counts for *all* segments. By an
+ %% amazing coincidence we stored that information on shutdown.
+ Segments1 =
+ lists:foldl(
+ fun ({Seg, UnackedCount}, SegmentsN) ->
+ Segment = segment_find_or_new(Seg, Dir, SegmentsN),
+ segment_store(Segment #segment { unacked = UnackedCount },
+ SegmentsN)
+ end, Segments, RecoveredCounts),
+ %% the counts above include transient messages, which would be the
+ %% wrong thing to return
+ {undefined, undefined, State1 # qistate { segments = Segments1 }}.
+
+init_dirty(CleanShutdown, ContainsCheckFun, State) ->
+ %% Recover the journal completely. This will also load segments
+ %% which have entries in the journal and remove duplicates. The
+ %% counts will correctly reflect the combination of the segment
+ %% and the journal.
+ State1 = #qistate { dir = Dir, segments = Segments } =
+ recover_journal(State),
+ {Segments1, Count, Bytes, DirtyCount} =
+ %% Load each segment in turn and filter out messages that are
+ %% not in the msg_store, by adding acks to the journal. These
+ %% acks only go to the RAM journal as it doesn't matter if we
+ %% lose them. Also mark delivered if not clean shutdown. Also
+ %% find the number of unacked messages. Also accumulate the
+ %% dirty count here, so we can call maybe_flush_journal below
+ %% and avoid unnecessary file system operations.
+ lists:foldl(
+ fun (Seg, {Segments2, CountAcc, BytesAcc, DirtyCount}) ->
+ {{Segment = #segment { unacked = UnackedCount }, Dirty},
+ UnackedBytes} =
+ recover_segment(ContainsCheckFun, CleanShutdown,
+ segment_find_or_new(Seg, Dir, Segments2),
+ State1#qistate.max_journal_entries),
+ {segment_store(Segment, Segments2),
+ CountAcc + UnackedCount,
+ BytesAcc + UnackedBytes, DirtyCount + Dirty}
+ end, {Segments, 0, 0, 0}, all_segment_nums(State1)),
+ State2 = maybe_flush_journal(State1 #qistate { segments = Segments1,
+ dirty_count = DirtyCount }),
+ {Count, Bytes, State2}.
+
+terminate(State = #qistate { journal_handle = JournalHdl,
+ segments = Segments }) ->
+ ok = case JournalHdl of
+ undefined -> ok;
+ _ -> file_handle_cache:close(JournalHdl)
+ end,
+ SegmentCounts =
+ segment_fold(
+ fun (#segment { num = Seg, unacked = UnackedCount }, Acc) ->
+ [{Seg, UnackedCount} | Acc]
+ end, [], Segments),
+ {SegmentCounts, State #qistate { journal_handle = undefined,
+ segments = undefined }}.
+
+recover_segment(ContainsCheckFun, CleanShutdown,
+ Segment = #segment { journal_entries = JEntries }, MaxJournal) ->
+ {SegEntries, UnackedCount} = load_segment(false, Segment),
+ {SegEntries1, UnackedCountDelta} =
+ segment_plus_journal(SegEntries, JEntries),
+ array:sparse_foldl(
+ fun (RelSeq, {{IsPersistent, Bin, MsgBin}, Del, no_ack},
+ {SegmentAndDirtyCount, Bytes}) ->
+ {MsgOrId, MsgProps} = parse_pub_record_body(Bin, MsgBin),
+ {recover_message(ContainsCheckFun(MsgOrId), CleanShutdown,
+ Del, RelSeq, SegmentAndDirtyCount, MaxJournal),
+ Bytes + case IsPersistent of
+ true -> MsgProps#message_properties.size;
+ false -> 0
+ end}
+ end,
+ {{Segment #segment { unacked = UnackedCount + UnackedCountDelta }, 0}, 0},
+ SegEntries1).
+
+recover_message( true, true, _Del, _RelSeq, SegmentAndDirtyCount, _MaxJournal) ->
+ SegmentAndDirtyCount;
+recover_message( true, false, del, _RelSeq, SegmentAndDirtyCount, _MaxJournal) ->
+ SegmentAndDirtyCount;
+recover_message( true, false, no_del, RelSeq, {Segment, _DirtyCount}, MaxJournal) ->
+ %% force to flush the segment
+ {add_to_journal(RelSeq, del, Segment), MaxJournal + 1};
+recover_message(false, _, del, RelSeq, {Segment, DirtyCount}, _MaxJournal) ->
+ {add_to_journal(RelSeq, ack, Segment), DirtyCount + 1};
+recover_message(false, _, no_del, RelSeq, {Segment, DirtyCount}, _MaxJournal) ->
+ {add_to_journal(RelSeq, ack,
+ add_to_journal(RelSeq, del, Segment)),
+ DirtyCount + 2}.
+
+%%----------------------------------------------------------------------------
+%% msg store startup delta function
+%%----------------------------------------------------------------------------
+
+queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) ->
+ {ok, Gatherer} = gatherer:start_link(),
+ [begin
+ ok = gatherer:fork(Gatherer),
+ ok = worker_pool:submit_async(
+ fun () -> link(Gatherer),
+ ok = queue_index_walker_reader(QueueName, Gatherer),
+ unlink(Gatherer),
+ ok
+ end)
+ end || QueueName <- DurableQueues],
+ queue_index_walker({next, Gatherer});
+
+queue_index_walker({next, Gatherer}) when is_pid(Gatherer) ->
+ case gatherer:out(Gatherer) of
+ empty ->
+ ok = gatherer:stop(Gatherer),
+ finished;
+ {value, {MsgId, Count}} ->
+ {MsgId, Count, {next, Gatherer}}
+ end.
+
+queue_index_walker_reader(QueueName, Gatherer) ->
+ ok = scan_queue_segments(
+ fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, ok)
+ when is_binary(MsgId) ->
+ gatherer:sync_in(Gatherer, {MsgId, 1});
+ (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered,
+ _IsAcked, Acc) ->
+ Acc
+ end, ok, QueueName),
+ ok = gatherer:finish(Gatherer).
+
+scan_queue_segments(Fun, Acc, #resource{ virtual_host = VHost } = QueueName) ->
+ VHostDir = rabbit_vhost:msg_store_dir_path(VHost),
+ scan_queue_segments(Fun, Acc, VHostDir, QueueName).
+
+scan_queue_segments(Fun, Acc, VHostDir, QueueName) ->
+ State = #qistate { segments = Segments, dir = Dir } =
+ recover_journal(blank_state(VHostDir, QueueName)),
+ Result = lists:foldr(
+ fun (Seg, AccN) ->
+ segment_entries_foldr(
+ fun (RelSeq, {{MsgOrId, MsgProps, IsPersistent},
+ IsDelivered, IsAcked}, AccM) ->
+ Fun(reconstruct_seq_id(Seg, RelSeq), MsgOrId, MsgProps,
+ IsPersistent, IsDelivered, IsAcked, AccM)
+ end, AccN, segment_find_or_new(Seg, Dir, Segments))
+ end, Acc, all_segment_nums(State)),
+ {_SegmentCounts, _State} = terminate(State),
+ Result.
+
+%%----------------------------------------------------------------------------
+%% expiry/binary manipulation
+%%----------------------------------------------------------------------------
+
+create_pub_record_body(MsgOrId, #message_properties { expiry = Expiry,
+ size = Size }) ->
+ ExpiryBin = expiry_to_binary(Expiry),
+ case MsgOrId of
+ MsgId when is_binary(MsgId) ->
+ {<<MsgId/binary, ExpiryBin/binary, Size:?SIZE_BITS>>, <<>>};
+ #basic_message{id = MsgId} ->
+ MsgBin = term_to_binary(MsgOrId),
+ {<<MsgId/binary, ExpiryBin/binary, Size:?SIZE_BITS>>, MsgBin}
+ end.
+
+expiry_to_binary(undefined) -> <<?NO_EXPIRY:?EXPIRY_BITS>>;
+expiry_to_binary(Expiry) -> <<Expiry:?EXPIRY_BITS>>.
+
+parse_pub_record_body(<<MsgIdNum:?MSG_ID_BITS, Expiry:?EXPIRY_BITS,
+ Size:?SIZE_BITS>>, MsgBin) ->
+ %% work around for binary data fragmentation. See
+ %% rabbit_msg_file:read_next/2
+ <<MsgId:?MSG_ID_BYTES/binary>> = <<MsgIdNum:?MSG_ID_BITS>>,
+ Props = #message_properties{expiry = case Expiry of
+ ?NO_EXPIRY -> undefined;
+ X -> X
+ end,
+ size = Size},
+ case MsgBin of
+ <<>> -> {MsgId, Props};
+ _ -> Msg = #basic_message{id = MsgId} = binary_to_term(MsgBin),
+ {Msg, Props}
+ end.
+
+%%----------------------------------------------------------------------------
+%% journal manipulation
+%%----------------------------------------------------------------------------
+
+add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
+ segments = Segments,
+ dir = Dir }) ->
+ {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+ Segment = segment_find_or_new(Seg, Dir, Segments),
+ Segment1 = add_to_journal(RelSeq, Action, Segment),
+ State #qistate { dirty_count = DCount + 1,
+ segments = segment_store(Segment1, Segments) };
+
+add_to_journal(RelSeq, Action,
+ Segment = #segment { journal_entries = JEntries,
+ entries_to_segment = EToSeg,
+ unacked = UnackedCount }) ->
+
+ {Fun, Entry} = action_to_entry(RelSeq, Action, JEntries),
+
+ {JEntries1, EToSeg1} =
+ case Fun of
+ set ->
+ {array:set(RelSeq, Entry, JEntries),
+ array:set(RelSeq, entry_to_segment(RelSeq, Entry, []),
+ EToSeg)};
+ reset ->
+ {array:reset(RelSeq, JEntries),
+ array:reset(RelSeq, EToSeg)}
+ end,
+
+ Segment #segment {
+ journal_entries = JEntries1,
+ entries_to_segment = EToSeg1,
+ unacked = UnackedCount + case Action of
+ ?PUB -> +1;
+ del -> 0;
+ ack -> -1
+ end}.
+
+action_to_entry(RelSeq, Action, JEntries) ->
+ case array:get(RelSeq, JEntries) of
+ undefined ->
+ {set,
+ case Action of
+ ?PUB -> {Action, no_del, no_ack};
+ del -> {no_pub, del, no_ack};
+ ack -> {no_pub, no_del, ack}
+ end};
+ ({Pub, no_del, no_ack}) when Action == del ->
+ {set, {Pub, del, no_ack}};
+ ({no_pub, del, no_ack}) when Action == ack ->
+ {set, {no_pub, del, ack}};
+ ({?PUB, del, no_ack}) when Action == ack ->
+ {reset, none}
+ end.
+
+maybe_flush_journal(State) ->
+ maybe_flush_journal(infinity, State).
+
+maybe_flush_journal(Hint, State = #qistate { dirty_count = DCount,
+ max_journal_entries = MaxJournal })
+ when DCount > MaxJournal orelse (Hint =/= infinity andalso DCount > Hint) ->
+ flush_journal(State);
+maybe_flush_journal(_Hint, State) ->
+ State.
+
+flush_journal(State = #qistate { segments = Segments }) ->
+ Segments1 =
+ segment_fold(
+ fun (#segment { unacked = 0, path = Path }, SegmentsN) ->
+ case rabbit_file:is_file(Path) of
+ true -> ok = rabbit_file:delete(Path);
+ false -> ok
+ end,
+ SegmentsN;
+ (#segment {} = Segment, SegmentsN) ->
+ segment_store(append_journal_to_segment(Segment), SegmentsN)
+ end, segments_new(), Segments),
+ {JournalHdl, State1} =
+ get_journal_handle(State #qistate { segments = Segments1 }),
+ ok = file_handle_cache:clear(JournalHdl),
+ notify_sync(State1 #qistate { dirty_count = 0 }).
+
+append_journal_to_segment(#segment { journal_entries = JEntries,
+ entries_to_segment = EToSeg,
+ path = Path } = Segment) ->
+ case array:sparse_size(JEntries) of
+ 0 -> Segment;
+ _ ->
+ file_handle_cache_stats:update(queue_index_write),
+
+ {ok, Hdl} = file_handle_cache:open_with_absolute_path(
+ Path, ?WRITE_MODE,
+ [{write_buffer, infinity}]),
+ %% the file_handle_cache also does a list reverse, so this
+ %% might not be required here, but before we were doing a
+ %% sparse_foldr, a lists:reverse/1 seems to be the correct
+ %% thing to do for now.
+ file_handle_cache:append(Hdl, lists:reverse(array:to_list(EToSeg))),
+ ok = file_handle_cache:close(Hdl),
+ Segment #segment { journal_entries = array_new(),
+ entries_to_segment = array_new([]) }
+ end.
+
+get_journal_handle(State = #qistate { journal_handle = undefined,
+ dir = Dir,
+ queue_name = Name }) ->
+ Path = filename:join(Dir, ?JOURNAL_FILENAME),
+ ok = rabbit_file:ensure_dir(Path),
+ ok = ensure_queue_name_stub_file(Dir, Name),
+ {ok, Hdl} = file_handle_cache:open_with_absolute_path(
+ Path, ?WRITE_MODE, [{write_buffer, infinity}]),
+ {Hdl, State #qistate { journal_handle = Hdl }};
+get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
+ {Hdl, State}.
+
+%% Loading Journal. This isn't idempotent and will mess up the counts
+%% if you call it more than once on the same state. Assumes the counts
+%% are 0 to start with.
+load_journal(State = #qistate { dir = Dir }) ->
+ Path = filename:join(Dir, ?JOURNAL_FILENAME),
+ case rabbit_file:is_file(Path) of
+ true -> {JournalHdl, State1} = get_journal_handle(State),
+ Size = rabbit_file:file_size(Path),
+ {ok, 0} = file_handle_cache:position(JournalHdl, 0),
+ {ok, JournalBin} = file_handle_cache:read(JournalHdl, Size),
+ parse_journal_entries(JournalBin, State1);
+ false -> State
+ end.
+
+%% ditto
+recover_journal(State) ->
+ State1 = #qistate { segments = Segments } = load_journal(State),
+ Segments1 =
+ segment_map(
+ fun (Segment = #segment { journal_entries = JEntries,
+ entries_to_segment = EToSeg,
+ unacked = UnackedCountInJournal }) ->
+ %% We want to keep ack'd entries in so that we can
+ %% remove them if duplicates are in the journal. The
+ %% counts here are purely from the segment itself.
+ {SegEntries, UnackedCountInSeg} = load_segment(true, Segment),
+ {JEntries1, EToSeg1, UnackedCountDuplicates} =
+ journal_minus_segment(JEntries, EToSeg, SegEntries),
+ Segment #segment { journal_entries = JEntries1,
+ entries_to_segment = EToSeg1,
+ unacked = (UnackedCountInJournal +
+ UnackedCountInSeg -
+ UnackedCountDuplicates) }
+ end, Segments),
+ State1 #qistate { segments = Segments1 }.
+
+parse_journal_entries(<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Rest/binary>>, State) ->
+ parse_journal_entries(Rest, add_to_journal(SeqId, del, State));
+
+parse_journal_entries(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Rest/binary>>, State) ->
+ parse_journal_entries(Rest, add_to_journal(SeqId, ack, State));
+parse_journal_entries(<<0:?JPREFIX_BITS, 0:?SEQ_BITS,
+ 0:?PUB_RECORD_SIZE_BYTES/unit:8, _/binary>>, State) ->
+ %% Journal entry composed only of zeroes was probably
+ %% produced during a dirty shutdown so stop reading
+ State;
+parse_journal_entries(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Bin:?PUB_RECORD_BODY_BYTES/binary,
+ MsgSize:?EMBEDDED_SIZE_BITS, MsgBin:MsgSize/binary,
+ Rest/binary>>, State) ->
+ IsPersistent = case Prefix of
+ ?PUB_PERSIST_JPREFIX -> true;
+ ?PUB_TRANS_JPREFIX -> false
+ end,
+ parse_journal_entries(
+ Rest, add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State));
+parse_journal_entries(_ErrOrEoF, State) ->
+ State.
+
+deliver_or_ack(_Kind, [], State) ->
+ State;
+deliver_or_ack(Kind, SeqIds, State) ->
+ JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end,
+ {JournalHdl, State1} = get_journal_handle(State),
+ file_handle_cache_stats:update(queue_index_journal_write),
+ ok = file_handle_cache:append(
+ JournalHdl,
+ [<<JPrefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>> || SeqId <- SeqIds]),
+ maybe_flush_journal(lists:foldl(fun (SeqId, StateN) ->
+ add_to_journal(SeqId, Kind, StateN)
+ end, State1, SeqIds)).
+
+notify_sync(State = #qistate{unconfirmed = UC,
+ unconfirmed_msg = UCM,
+ on_sync = OnSyncFun,
+ on_sync_msg = OnSyncMsgFun}) ->
+ State1 = case gb_sets:is_empty(UC) of
+ true -> State;
+ false -> OnSyncFun(UC),
+ State#qistate{unconfirmed = gb_sets:new()}
+ end,
+ case gb_sets:is_empty(UCM) of
+ true -> State1;
+ false -> OnSyncMsgFun(UCM),
+ State1#qistate{unconfirmed_msg = gb_sets:new()}
+ end.
+
+%%----------------------------------------------------------------------------
+%% segment manipulation
+%%----------------------------------------------------------------------------
+
+seq_id_to_seg_and_rel_seq_id(SeqId) ->
+ { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
+
+reconstruct_seq_id(Seg, RelSeq) ->
+ (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq.
+
+all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
+ lists:sort(
+ sets:to_list(
+ lists:foldl(
+ fun (SegName, Set) ->
+ sets:add_element(
+ list_to_integer(
+ lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end,
+ SegName)), Set)
+ end, sets:from_list(segment_nums(Segments)),
+ rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir)))).
+
+segment_find_or_new(Seg, Dir, Segments) ->
+ case segment_find(Seg, Segments) of
+ {ok, Segment} -> Segment;
+ error -> SegName = integer_to_list(Seg) ++ ?SEGMENT_EXTENSION,
+ Path = filename:join(Dir, SegName),
+ #segment { num = Seg,
+ path = Path,
+ journal_entries = array_new(),
+ entries_to_segment = array_new([]),
+ unacked = 0 }
+ end.
+
+segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) ->
+ {ok, Segment}; %% 1 or (2, matches head)
+segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) ->
+ {ok, Segment}; %% 2, matches tail
+segment_find(Seg, {Segments, _}) -> %% no match
+ maps:find(Seg, Segments).
+
+segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head)
+ {Segments, [#segment { num = Seg } | Tail]}) ->
+ {Segments, [Segment | Tail]};
+segment_store(Segment = #segment { num = Seg }, %% 2, matches tail
+ {Segments, [SegmentA, #segment { num = Seg }]}) ->
+ {Segments, [Segment, SegmentA]};
+segment_store(Segment = #segment { num = Seg }, {Segments, []}) ->
+ {maps:remove(Seg, Segments), [Segment]};
+segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) ->
+ {maps:remove(Seg, Segments), [Segment, SegmentA]};
+segment_store(Segment = #segment { num = Seg },
+ {Segments, [SegmentA, SegmentB]}) ->
+ {maps:put(SegmentB#segment.num, SegmentB, maps:remove(Seg, Segments)),
+ [Segment, SegmentA]}.
+
+segment_fold(Fun, Acc, {Segments, CachedSegments}) ->
+ maps:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end,
+ lists:foldl(Fun, Acc, CachedSegments), Segments).
+
+segment_map(Fun, {Segments, CachedSegments}) ->
+ {maps:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments),
+ lists:map(Fun, CachedSegments)}.
+
+segment_nums({Segments, CachedSegments}) ->
+ lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++
+ maps:keys(Segments).
+
+segments_new() ->
+ {#{}, []}.
+
+entry_to_segment(_RelSeq, {?PUB, del, ack}, Initial) ->
+ Initial;
+entry_to_segment(RelSeq, {Pub, Del, Ack}, Initial) ->
+ %% NB: we are assembling the segment in reverse order here, so
+ %% del/ack comes first.
+ Buf1 = case {Del, Ack} of
+ {no_del, no_ack} ->
+ Initial;
+ _ ->
+ Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+ RelSeq:?REL_SEQ_BITS>>,
+ case {Del, Ack} of
+ {del, ack} -> [[Binary, Binary] | Initial];
+ _ -> [Binary | Initial]
+ end
+ end,
+ case Pub of
+ no_pub ->
+ Buf1;
+ {IsPersistent, Bin, MsgBin} ->
+ [[<<?PUB_PREFIX:?PUB_PREFIX_BITS,
+ (bool_to_int(IsPersistent)):1,
+ RelSeq:?REL_SEQ_BITS, Bin/binary,
+ (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin] | Buf1]
+ end.
+
+read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq},
+ {Messages, Segments}, Dir) ->
+ Segment = segment_find_or_new(Seg, Dir, Segments),
+ {segment_entries_foldr(
+ fun (RelSeq, {{MsgOrId, MsgProps, IsPersistent}, IsDelivered, no_ack},
+ Acc)
+ when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso
+ (Seg < EndSeg orelse EndRelSeq >= RelSeq) ->
+ [{MsgOrId, reconstruct_seq_id(StartSeg, RelSeq), MsgProps,
+ IsPersistent, IsDelivered == del} | Acc];
+ (_RelSeq, _Value, Acc) ->
+ Acc
+ end, Messages, Segment),
+ segment_store(Segment, Segments)}.
+
+segment_entries_foldr(Fun, Init,
+ Segment = #segment { journal_entries = JEntries }) ->
+ {SegEntries, _UnackedCount} = load_segment(false, Segment),
+ {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries),
+ array:sparse_foldr(
+ fun (RelSeq, {{IsPersistent, Bin, MsgBin}, Del, Ack}, Acc) ->
+ {MsgOrId, MsgProps} = parse_pub_record_body(Bin, MsgBin),
+ Fun(RelSeq, {{MsgOrId, MsgProps, IsPersistent}, Del, Ack}, Acc)
+ end, Init, SegEntries1).
+
+%% Loading segments
+%%
+%% Does not do any combining with the journal at all.
+load_segment(KeepAcked, #segment { path = Path }) ->
+ Empty = {array_new(), 0},
+ case rabbit_file:is_file(Path) of
+ false -> Empty;
+ true -> Size = rabbit_file:file_size(Path),
+ file_handle_cache_stats:update(queue_index_read),
+ {ok, Hdl} = file_handle_cache:open_with_absolute_path(
+ Path, ?READ_MODE, []),
+ {ok, 0} = file_handle_cache:position(Hdl, bof),
+ {ok, SegBin} = file_handle_cache:read(Hdl, Size),
+ ok = file_handle_cache:close(Hdl),
+ Res = parse_segment_entries(SegBin, KeepAcked, Empty),
+ Res
+ end.
+
+parse_segment_entries(<<?PUB_PREFIX:?PUB_PREFIX_BITS,
+ IsPersistNum:1, RelSeq:?REL_SEQ_BITS, Rest/binary>>,
+ KeepAcked, Acc) ->
+ parse_segment_publish_entry(
+ Rest, 1 == IsPersistNum, RelSeq, KeepAcked, Acc);
+parse_segment_entries(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+ RelSeq:?REL_SEQ_BITS, Rest/binary>>, KeepAcked, Acc) ->
+ parse_segment_entries(
+ Rest, KeepAcked, add_segment_relseq_entry(KeepAcked, RelSeq, Acc));
+parse_segment_entries(<<>>, _KeepAcked, Acc) ->
+ Acc.
+
+parse_segment_publish_entry(<<Bin:?PUB_RECORD_BODY_BYTES/binary,
+ MsgSize:?EMBEDDED_SIZE_BITS,
+ MsgBin:MsgSize/binary, Rest/binary>>,
+ IsPersistent, RelSeq, KeepAcked,
+ {SegEntries, Unacked}) ->
+ Obj = {{IsPersistent, Bin, MsgBin}, no_del, no_ack},
+ SegEntries1 = array:set(RelSeq, Obj, SegEntries),
+ parse_segment_entries(Rest, KeepAcked, {SegEntries1, Unacked + 1});
+parse_segment_publish_entry(Rest, _IsPersistent, _RelSeq, KeepAcked, Acc) ->
+ parse_segment_entries(Rest, KeepAcked, Acc).
+
+add_segment_relseq_entry(KeepAcked, RelSeq, {SegEntries, Unacked}) ->
+ case array:get(RelSeq, SegEntries) of
+ {Pub, no_del, no_ack} ->
+ {array:set(RelSeq, {Pub, del, no_ack}, SegEntries), Unacked};
+ {Pub, del, no_ack} when KeepAcked ->
+ {array:set(RelSeq, {Pub, del, ack}, SegEntries), Unacked - 1};
+ {_Pub, del, no_ack} ->
+ {array:reset(RelSeq, SegEntries), Unacked - 1}
+ end.
+
+array_new() ->
+ array_new(undefined).
+
+array_new(Default) ->
+ array:new([{default, Default}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
+
+bool_to_int(true ) -> 1;
+bool_to_int(false) -> 0.
+
+%%----------------------------------------------------------------------------
+%% journal & segment combination
+%%----------------------------------------------------------------------------
+
+%% Combine what we have just read from a segment file with what we're
+%% holding for that segment in memory. There must be no duplicates.
+segment_plus_journal(SegEntries, JEntries) ->
+ array:sparse_foldl(
+ fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) ->
+ SegEntry = array:get(RelSeq, SegEntriesOut),
+ {Obj, AdditionalUnackedDelta} =
+ segment_plus_journal1(SegEntry, JObj),
+ {case Obj of
+ undefined -> array:reset(RelSeq, SegEntriesOut);
+ _ -> array:set(RelSeq, Obj, SegEntriesOut)
+ end,
+ AdditionalUnacked + AdditionalUnackedDelta}
+ end, {SegEntries, 0}, JEntries).
+
+%% Here, the result is a tuple with the first element containing the
+%% item which we may be adding to (for items only in the journal),
+%% modifying in (bits in both), or, when returning 'undefined',
+%% erasing from (ack in journal, not segment) the segment array. The
+%% other element of the tuple is the delta for AdditionalUnacked.
+segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) ->
+ {Obj, 1};
+segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) ->
+ {Obj, 1};
+segment_plus_journal1(undefined, {?PUB, del, ack}) ->
+ {undefined, 0};
+
+segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) ->
+ {{Pub, del, no_ack}, 0};
+segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, del, ack}) ->
+ {undefined, -1};
+segment_plus_journal1({?PUB, del, no_ack}, {no_pub, no_del, ack}) ->
+ {undefined, -1}.
+
+%% Remove from the journal entries for a segment, items that are
+%% duplicates of entries found in the segment itself. Used on start up
+%% to clean up the journal.
+%%
+%% We need to update the entries_to_segment since they are just a
+%% cache of what's on the journal.
+journal_minus_segment(JEntries, EToSeg, SegEntries) ->
+ array:sparse_foldl(
+ fun (RelSeq, JObj, {JEntriesOut, EToSegOut, UnackedRemoved}) ->
+ SegEntry = array:get(RelSeq, SegEntries),
+ {Obj, UnackedRemovedDelta} =
+ journal_minus_segment1(JObj, SegEntry),
+ {JEntriesOut1, EToSegOut1} =
+ case Obj of
+ keep ->
+ {JEntriesOut, EToSegOut};
+ undefined ->
+ {array:reset(RelSeq, JEntriesOut),
+ array:reset(RelSeq, EToSegOut)};
+ _ ->
+ {array:set(RelSeq, Obj, JEntriesOut),
+ array:set(RelSeq, entry_to_segment(RelSeq, Obj, []),
+ EToSegOut)}
+ end,
+ {JEntriesOut1, EToSegOut1, UnackedRemoved + UnackedRemovedDelta}
+ end, {JEntries, EToSeg, 0}, JEntries).
+
+%% Here, the result is a tuple with the first element containing the
+%% item we are adding to or modifying in the (initially fresh) journal
+%% array. If the item is 'undefined' we leave the journal array
+%% alone. The other element of the tuple is the deltas for
+%% UnackedRemoved.
+
+%% Both the same. Must be at least the publish
+journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) ->
+ {undefined, 1};
+journal_minus_segment1({?PUB, _Del, ack} = Obj, Obj) ->
+ {undefined, 0};
+
+%% Just publish in journal
+journal_minus_segment1({?PUB, no_del, no_ack}, undefined) ->
+ {keep, 0};
+
+%% Publish and deliver in journal
+journal_minus_segment1({?PUB, del, no_ack}, undefined) ->
+ {keep, 0};
+journal_minus_segment1({?PUB = Pub, del, no_ack}, {Pub, no_del, no_ack}) ->
+ {{no_pub, del, no_ack}, 1};
+
+%% Publish, deliver and ack in journal
+journal_minus_segment1({?PUB, del, ack}, undefined) ->
+ {keep, 0};
+journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, no_del, no_ack}) ->
+ {{no_pub, del, ack}, 1};
+journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, del, no_ack}) ->
+ {{no_pub, no_del, ack}, 1};
+
+%% Just deliver in journal
+journal_minus_segment1({no_pub, del, no_ack}, {?PUB, no_del, no_ack}) ->
+ {keep, 0};
+journal_minus_segment1({no_pub, del, no_ack}, {?PUB, del, no_ack}) ->
+ {undefined, 0};
+
+%% Just ack in journal
+journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) ->
+ {keep, 0};
+journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, ack}) ->
+ {undefined, -1};
+
+%% Deliver and ack in journal
+journal_minus_segment1({no_pub, del, ack}, {?PUB, no_del, no_ack}) ->
+ {keep, 0};
+journal_minus_segment1({no_pub, del, ack}, {?PUB, del, no_ack}) ->
+ {{no_pub, no_del, ack}, 0};
+journal_minus_segment1({no_pub, del, ack}, {?PUB, del, ack}) ->
+ {undefined, -1};
+
+%% Missing segment. If flush_journal/1 is interrupted after deleting
+%% the segment but before truncating the journal we can get these
+%% cases: a delivery and an acknowledgement in the journal, or just an
+%% acknowledgement in the journal, but with no segment. In both cases
+%% we have really forgotten the message; so ignore what's in the
+%% journal.
+journal_minus_segment1({no_pub, no_del, ack}, undefined) ->
+ {undefined, 0};
+journal_minus_segment1({no_pub, del, ack}, undefined) ->
+ {undefined, 0}.
+
+%%----------------------------------------------------------------------------
+%% upgrade
+%%----------------------------------------------------------------------------
+
+-spec add_queue_ttl() -> 'ok'.
+
+add_queue_ttl() ->
+ foreach_queue_index({fun add_queue_ttl_journal/1,
+ fun add_queue_ttl_segment/1}).
+
+add_queue_ttl_journal(<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Rest/binary>>) ->
+ {<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest};
+add_queue_ttl_journal(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Rest/binary>>) ->
+ {<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest};
+add_queue_ttl_journal(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ MsgId:?MSG_ID_BYTES/binary, Rest/binary>>) ->
+ {[<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, MsgId,
+ expiry_to_binary(undefined)], Rest};
+add_queue_ttl_journal(_) ->
+ stop.
+
+add_queue_ttl_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1,
+ RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BYTES/binary,
+ Rest/binary>>) ->
+ {[<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>,
+ MsgId, expiry_to_binary(undefined)], Rest};
+add_queue_ttl_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+ RelSeq:?REL_SEQ_BITS, Rest/binary>>) ->
+ {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>,
+ Rest};
+add_queue_ttl_segment(_) ->
+ stop.
+
+avoid_zeroes() ->
+ foreach_queue_index({none, fun avoid_zeroes_segment/1}).
+
+avoid_zeroes_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1,
+ RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS,
+ Expiry:?EXPIRY_BITS, Rest/binary>>) ->
+ {<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS,
+ MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS>>, Rest};
+avoid_zeroes_segment(<<0:?REL_SEQ_ONLY_PREFIX_BITS,
+ RelSeq:?REL_SEQ_BITS, Rest/binary>>) ->
+ {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>,
+ Rest};
+avoid_zeroes_segment(_) ->
+ stop.
+
+%% At upgrade time we just define every message's size as 0 - that
+%% will save us a load of faff with the message store, and means we
+%% can actually use the clean recovery terms in VQ. It does mean we
+%% don't count message bodies from before the migration, but we can
+%% live with that.
+store_msg_size() ->
+ foreach_queue_index({fun store_msg_size_journal/1,
+ fun store_msg_size_segment/1}).
+
+store_msg_size_journal(<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Rest/binary>>) ->
+ {<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest};
+store_msg_size_journal(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Rest/binary>>) ->
+ {<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest};
+store_msg_size_journal(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS,
+ Rest/binary>>) ->
+ {<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, MsgId:?MSG_ID_BITS,
+ Expiry:?EXPIRY_BITS, 0:?SIZE_BITS>>, Rest};
+store_msg_size_journal(_) ->
+ stop.
+
+store_msg_size_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1,
+ RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS,
+ Expiry:?EXPIRY_BITS, Rest/binary>>) ->
+ {<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS,
+ MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS, 0:?SIZE_BITS>>, Rest};
+store_msg_size_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+ RelSeq:?REL_SEQ_BITS, Rest/binary>>) ->
+ {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>,
+ Rest};
+store_msg_size_segment(_) ->
+ stop.
+
+store_msg() ->
+ foreach_queue_index({fun store_msg_journal/1,
+ fun store_msg_segment/1}).
+
+store_msg_journal(<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Rest/binary>>) ->
+ {<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest};
+store_msg_journal(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ Rest/binary>>) ->
+ {<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest};
+store_msg_journal(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS,
+ MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS, Size:?SIZE_BITS,
+ Rest/binary>>) ->
+ {<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, MsgId:?MSG_ID_BITS,
+ Expiry:?EXPIRY_BITS, Size:?SIZE_BITS,
+ 0:?EMBEDDED_SIZE_BITS>>, Rest};
+store_msg_journal(_) ->
+ stop.
+
+store_msg_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1,
+ RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS,
+ Expiry:?EXPIRY_BITS, Size:?SIZE_BITS, Rest/binary>>) ->
+ {<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS,
+ MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS, Size:?SIZE_BITS,
+ 0:?EMBEDDED_SIZE_BITS>>, Rest};
+store_msg_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+ RelSeq:?REL_SEQ_BITS, Rest/binary>>) ->
+ {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>,
+ Rest};
+store_msg_segment(_) ->
+ stop.
+
+
+
+%%----------------------------------------------------------------------------
+%% Migration functions
+%%----------------------------------------------------------------------------
+
+foreach_queue_index(Funs) ->
+ QueueDirNames = all_queue_directory_names(),
+ {ok, Gatherer} = gatherer:start_link(),
+ [begin
+ ok = gatherer:fork(Gatherer),
+ ok = worker_pool:submit_async(
+ fun () ->
+ transform_queue(QueueDirName, Gatherer, Funs)
+ end)
+ end || QueueDirName <- QueueDirNames],
+ empty = gatherer:out(Gatherer),
+ ok = gatherer:stop(Gatherer).
+
+transform_queue(Dir, Gatherer, {JournalFun, SegmentFun}) ->
+ ok = transform_file(filename:join(Dir, ?JOURNAL_FILENAME), JournalFun),
+ [ok = transform_file(filename:join(Dir, Seg), SegmentFun)
+ || Seg <- rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir)],
+ ok = gatherer:finish(Gatherer).
+
+transform_file(_Path, none) ->
+ ok;
+transform_file(Path, Fun) when is_function(Fun)->
+ PathTmp = Path ++ ".upgrade",
+ case rabbit_file:file_size(Path) of
+ 0 -> ok;
+ Size -> {ok, PathTmpHdl} =
+ file_handle_cache:open_with_absolute_path(
+ PathTmp, ?WRITE_MODE,
+ [{write_buffer, infinity}]),
+
+ {ok, PathHdl} = file_handle_cache:open_with_absolute_path(
+ Path, ?READ_MODE, [{read_buffer, Size}]),
+ {ok, Content} = file_handle_cache:read(PathHdl, Size),
+ ok = file_handle_cache:close(PathHdl),
+
+ ok = drive_transform_fun(Fun, PathTmpHdl, Content),
+
+ ok = file_handle_cache:close(PathTmpHdl),
+ ok = rabbit_file:rename(PathTmp, Path)
+ end.
+
+drive_transform_fun(Fun, Hdl, Contents) ->
+ case Fun(Contents) of
+ stop -> ok;
+ {Output, Contents1} -> ok = file_handle_cache:append(Hdl, Output),
+ drive_transform_fun(Fun, Hdl, Contents1)
+ end.
+
+move_to_per_vhost_stores(#resource{virtual_host = VHost} = QueueName) ->
+ OldQueueDir = filename:join([queues_base_dir(), "queues",
+ queue_name_to_dir_name_legacy(QueueName)]),
+ VHostDir = rabbit_vhost:msg_store_dir_path(VHost),
+ NewQueueDir = queue_dir(VHostDir, QueueName),
+ rabbit_log_upgrade:info("About to migrate queue directory '~s' to '~s'",
+ [OldQueueDir, NewQueueDir]),
+ case rabbit_file:is_dir(OldQueueDir) of
+ true ->
+ ok = rabbit_file:ensure_dir(NewQueueDir),
+ ok = rabbit_file:rename(OldQueueDir, NewQueueDir),
+ ok = ensure_queue_name_stub_file(NewQueueDir, QueueName);
+ false ->
+ Msg = "Queue index directory '~s' not found for ~s~n",
+ Args = [OldQueueDir, rabbit_misc:rs(QueueName)],
+ rabbit_log_upgrade:error(Msg, Args),
+ rabbit_log:error(Msg, Args)
+ end,
+ ok.
+
+ensure_queue_name_stub_file(Dir, #resource{virtual_host = VHost, name = QName}) ->
+ QueueNameFile = filename:join(Dir, ?QUEUE_NAME_STUB_FILE),
+ file:write_file(QueueNameFile, <<"VHOST: ", VHost/binary, "\n",
+ "QUEUE: ", QName/binary, "\n">>).
+
+read_global_recovery_terms(DurableQueueNames) ->
+ ok = rabbit_recovery_terms:open_global_table(),
+
+ DurableTerms =
+ lists:foldl(
+ fun(QName, RecoveryTerms) ->
+ DirName = queue_name_to_dir_name_legacy(QName),
+ RecoveryInfo = case rabbit_recovery_terms:read_global(DirName) of
+ {error, _} -> non_clean_shutdown;
+ {ok, Terms} -> Terms
+ end,
+ [RecoveryInfo | RecoveryTerms]
+ end, [], DurableQueueNames),
+
+ ok = rabbit_recovery_terms:close_global_table(),
+ %% The backing queue interface requires that the queue recovery terms
+ %% which come back from start/1 are in the same order as DurableQueueNames
+ OrderedTerms = lists:reverse(DurableTerms),
+ {OrderedTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}.
+
+cleanup_global_recovery_terms() ->
+ rabbit_file:recursive_delete([filename:join([queues_base_dir(), "queues"])]),
+ rabbit_recovery_terms:delete_global_table(),
+ ok.
+
+
+update_recovery_term(#resource{virtual_host = VHost} = QueueName, Term) ->
+ Key = queue_name_to_dir_name(QueueName),
+ rabbit_recovery_terms:store(VHost, Key, Term).
diff --git a/deps/rabbit/src/rabbit_queue_location_client_local.erl b/deps/rabbit/src/rabbit_queue_location_client_local.erl
new file mode 100644
index 0000000000..2df1608534
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_location_client_local.erl
@@ -0,0 +1,39 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_location_client_local).
+-behaviour(rabbit_queue_master_locator).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-export([description/0, queue_master_location/1]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "locate queue master client local"},
+ {mfa, {rabbit_registry, register,
+ [queue_master_locator,
+ <<"client-local">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+
+%%---------------------------------------------------------------------------
+%% Queue Master Location Callbacks
+%%---------------------------------------------------------------------------
+
+description() ->
+ [{description, <<"Locate queue master node as the client local node">>}].
+
+queue_master_location(Q) when ?is_amqqueue(Q) ->
+ %% unlike with other locator strategies we do not check node maintenance
+ %% status for two reasons:
+ %%
+ %% * nodes in maintenance mode will drop their client connections
+ %% * with other strategies, if no nodes are available, the current node
+ %% is returned but this strategy already does just that
+ {ok, node()}.
diff --git a/deps/rabbit/src/rabbit_queue_location_min_masters.erl b/deps/rabbit/src/rabbit_queue_location_min_masters.erl
new file mode 100644
index 0000000000..6535f082fe
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_location_min_masters.erl
@@ -0,0 +1,70 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_location_min_masters).
+-behaviour(rabbit_queue_master_locator).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-export([description/0, queue_master_location/1]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "locate queue master min bound queues"},
+ {mfa, {rabbit_registry, register,
+ [queue_master_locator,
+ <<"min-masters">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+%%---------------------------------------------------------------------------
+%% Queue Master Location Callbacks
+%%---------------------------------------------------------------------------
+
+description() ->
+ [{description,
+ <<"Locate queue master node from cluster node with least bound queues">>}].
+
+queue_master_location(Q) when ?is_amqqueue(Q) ->
+ Cluster = rabbit_queue_master_location_misc:all_nodes(Q),
+ QueueNames = rabbit_amqqueue:list_names(),
+ MastersPerNode0 = lists:foldl(
+ fun(#resource{virtual_host = VHost, name = QueueName}, NodeMasters) ->
+ case rabbit_queue_master_location_misc:lookup_master(QueueName, VHost) of
+ {ok, Master} when is_atom(Master) ->
+ case maps:is_key(Master, NodeMasters) of
+ true -> maps:update_with(Master,
+ fun(N) -> N + 1 end,
+ NodeMasters);
+ false -> NodeMasters
+ end;
+ _ -> NodeMasters
+ end
+ end,
+ maps:from_list([{N, 0} || N <- Cluster]),
+ QueueNames),
+
+ MastersPerNode = maps:filter(fun (Node, _N) ->
+ not rabbit_maintenance:is_being_drained_local_read(Node)
+ end, MastersPerNode0),
+
+ case map_size(MastersPerNode) > 0 of
+ true ->
+ {MinNode, _NMasters} = maps:fold(
+ fun(Node, NMasters, init) ->
+ {Node, NMasters};
+ (Node, NMasters, {MinNode, MinMasters}) ->
+ case NMasters < MinMasters of
+ true -> {Node, NMasters};
+ false -> {MinNode, MinMasters}
+ end
+ end,
+ init, MastersPerNode),
+ {ok, MinNode};
+ false ->
+ undefined
+ end.
diff --git a/deps/rabbit/src/rabbit_queue_location_random.erl b/deps/rabbit/src/rabbit_queue_location_random.erl
new file mode 100644
index 0000000000..7232fc6703
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_location_random.erl
@@ -0,0 +1,42 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_location_random).
+-behaviour(rabbit_queue_master_locator).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-export([description/0, queue_master_location/1]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "locate queue master random"},
+ {mfa, {rabbit_registry, register,
+ [queue_master_locator,
+ <<"random">>, ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, kernel_ready}]}).
+
+%%---------------------------------------------------------------------------
+%% Queue Master Location Callbacks
+%%---------------------------------------------------------------------------
+
+description() ->
+ [{description,
+ <<"Locate queue master node from cluster in a random manner">>}].
+
+queue_master_location(Q) when ?is_amqqueue(Q) ->
+ Cluster0 = rabbit_queue_master_location_misc:all_nodes(Q),
+ Cluster = rabbit_maintenance:filter_out_drained_nodes_local_read(Cluster0),
+ case Cluster of
+ [] ->
+ undefined;
+ Candidates when is_list(Candidates) ->
+ RandomPos = erlang:phash2(erlang:monotonic_time(), length(Candidates)),
+ MasterNode = lists:nth(RandomPos + 1, Candidates),
+ {ok, MasterNode}
+ end.
diff --git a/deps/rabbit/src/rabbit_queue_location_validator.erl b/deps/rabbit/src/rabbit_queue_location_validator.erl
new file mode 100644
index 0000000000..bf41be622c
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_location_validator.erl
@@ -0,0 +1,67 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_location_validator).
+-behaviour(rabbit_policy_validator).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-export([validate_policy/1, validate_strategy/1]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "Queue location policy validation"},
+ {mfa, {rabbit_registry, register,
+ [policy_validator,
+ <<"queue-master-locator">>,
+ ?MODULE]}},
+ {requires, rabbit_registry},
+ {enables, recovery}]}).
+
+validate_policy(KeyList) ->
+ case proplists:lookup(<<"queue-master-locator">> , KeyList) of
+ {_, Strategy} -> case validate_strategy(Strategy) of
+ {error, _, _} = Er -> Er;
+ _ -> ok
+ end;
+ _ -> {error, "queue-master-locator undefined"}
+ end.
+
+validate_strategy(Strategy) ->
+ case module(Strategy) of
+ R = {ok, _M} -> R;
+ _ ->
+ {error, "~p invalid queue-master-locator value", [Strategy]}
+ end.
+
+policy(Policy, Q) ->
+ case rabbit_policy:get(Policy, Q) of
+ undefined -> none;
+ P -> P
+ end.
+
+module(Q) when ?is_amqqueue(Q) ->
+ case policy(<<"queue-master-locator">>, Q) of
+ undefined -> no_location_strategy;
+ Mode -> module(Mode)
+ end;
+module(Strategy) when is_binary(Strategy) ->
+ case rabbit_registry:binary_to_type(Strategy) of
+ {error, not_found} -> no_location_strategy;
+ T ->
+ case rabbit_registry:lookup_module(queue_master_locator, T) of
+ {ok, Module} ->
+ case code:which(Module) of
+ non_existing -> no_location_strategy;
+ _ -> {ok, Module}
+ end;
+ _ ->
+ no_location_strategy
+ end
+ end;
+module(Strategy) ->
+ module(rabbit_data_coercion:to_binary(Strategy)).
diff --git a/deps/rabbit/src/rabbit_queue_master_location_misc.erl b/deps/rabbit/src/rabbit_queue_master_location_misc.erl
new file mode 100644
index 0000000000..37698e184f
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_master_location_misc.erl
@@ -0,0 +1,108 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_master_location_misc).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("amqqueue.hrl").
+
+-export([lookup_master/2,
+ lookup_queue/2,
+ get_location/1,
+ get_location_mod_by_config/1,
+ get_location_mod_by_args/1,
+ get_location_mod_by_policy/1,
+ all_nodes/1]).
+
+-spec lookup_master(binary(), binary()) -> {ok, node()} | {error, not_found}.
+lookup_master(QueueNameBin, VHostPath) when is_binary(QueueNameBin),
+ is_binary(VHostPath) ->
+ QueueR = rabbit_misc:r(VHostPath, queue, QueueNameBin),
+ case rabbit_amqqueue:lookup(QueueR) of
+ {ok, Queue} when ?amqqueue_has_valid_pid(Queue) ->
+ Pid = amqqueue:get_pid(Queue),
+ {ok, node(Pid)};
+ Error -> Error
+ end.
+
+lookup_queue(QueueNameBin, VHostPath) when is_binary(QueueNameBin),
+ is_binary(VHostPath) ->
+ QueueR = rabbit_misc:r(VHostPath, queue, QueueNameBin),
+ case rabbit_amqqueue:lookup(QueueR) of
+ Reply = {ok, Queue} when ?is_amqqueue(Queue) ->
+ Reply;
+ Error ->
+ Error
+ end.
+
+get_location(Queue) when ?is_amqqueue(Queue) ->
+ Reply1 = case get_location_mod_by_args(Queue) of
+ _Err1 = {error, _} ->
+ case get_location_mod_by_policy(Queue) of
+ _Err2 = {error, _} ->
+ case get_location_mod_by_config(Queue) of
+ Err3 = {error, _} -> Err3;
+ Reply0 = {ok, _Module} -> Reply0
+ end;
+ Reply0 = {ok, _Module} -> Reply0
+ end;
+ Reply0 = {ok, _Module} -> Reply0
+ end,
+
+ case Reply1 of
+ {ok, CB} -> CB:queue_master_location(Queue);
+ Error -> Error
+ end.
+
+get_location_mod_by_args(Queue) when ?is_amqqueue(Queue) ->
+ Args = amqqueue:get_arguments(Queue),
+ case rabbit_misc:table_lookup(Args, <<"x-queue-master-locator">>) of
+ {_Type, Strategy} ->
+ case rabbit_queue_location_validator:validate_strategy(Strategy) of
+ Reply = {ok, _CB} -> Reply;
+ Error -> Error
+ end;
+ _ -> {error, "x-queue-master-locator undefined"}
+ end.
+
+get_location_mod_by_policy(Queue) when ?is_amqqueue(Queue) ->
+ case rabbit_policy:get(<<"queue-master-locator">> , Queue) of
+ undefined -> {error, "queue-master-locator policy undefined"};
+ Strategy ->
+ case rabbit_queue_location_validator:validate_strategy(Strategy) of
+ Reply = {ok, _CB} -> Reply;
+ Error -> Error
+ end
+ end.
+
+get_location_mod_by_config(Queue) when ?is_amqqueue(Queue) ->
+ case application:get_env(rabbit, queue_master_locator) of
+ {ok, Strategy} ->
+ case rabbit_queue_location_validator:validate_strategy(Strategy) of
+ Reply = {ok, _CB} -> Reply;
+ Error -> Error
+ end;
+ _ -> {error, "queue_master_locator undefined"}
+ end.
+
+all_nodes(Queue) when ?is_amqqueue(Queue) ->
+ handle_is_mirrored_ha_nodes(rabbit_mirror_queue_misc:is_mirrored_ha_nodes(Queue), Queue).
+
+handle_is_mirrored_ha_nodes(false, _Queue) ->
+ % Note: ha-mode is NOT 'nodes' - it is either exactly or all, which means
+ % that any node in the cluster is eligible to be the new queue master node
+ rabbit_nodes:all_running();
+handle_is_mirrored_ha_nodes(true, Queue) ->
+ % Note: ha-mode is 'nodes', which explicitly specifies allowed nodes.
+ % We must use suggested_queue_nodes to get that list of nodes as the
+ % starting point for finding the queue master location
+ handle_suggested_queue_nodes(rabbit_mirror_queue_misc:suggested_queue_nodes(Queue)).
+
+handle_suggested_queue_nodes({_MNode, []}) ->
+ rabbit_nodes:all_running();
+handle_suggested_queue_nodes({MNode, SNodes}) ->
+ [MNode | SNodes].
diff --git a/deps/rabbit/src/rabbit_queue_master_locator.erl b/deps/rabbit/src/rabbit_queue_master_locator.erl
new file mode 100644
index 0000000000..ff2e30f587
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_master_locator.erl
@@ -0,0 +1,19 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_queue_master_locator).
+
+-behaviour(rabbit_registry_class).
+
+-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]).
+
+-callback description() -> [proplists:property()].
+-callback queue_master_location(amqqueue:amqqueue()) ->
+ {'ok', node()} | {'error', term()}.
+
+added_to_rabbit_registry(_Type, _ModuleName) -> ok.
+removed_from_rabbit_registry(_Type) -> ok.
diff --git a/deps/rabbit/src/rabbit_queue_type.erl b/deps/rabbit/src/rabbit_queue_type.erl
new file mode 100644
index 0000000000..4e59b6a7c0
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_type.erl
@@ -0,0 +1,581 @@
+-module(rabbit_queue_type).
+-include("amqqueue.hrl").
+-include_lib("rabbit_common/include/resource.hrl").
+
+-export([
+ init/0,
+ close/1,
+ discover/1,
+ default/0,
+ is_enabled/1,
+ declare/2,
+ delete/4,
+ is_recoverable/1,
+ recover/2,
+ purge/1,
+ policy_changed/1,
+ stat/1,
+ remove/2,
+ info/2,
+ state_info/1,
+ info_down/2,
+ info_down/3,
+ %% stateful client API
+ new/2,
+ consume/3,
+ cancel/5,
+ handle_down/3,
+ handle_event/3,
+ module/2,
+ deliver/3,
+ settle/5,
+ credit/5,
+ dequeue/5,
+ fold_state/3,
+ is_policy_applicable/2,
+ is_server_named_allowed/1
+ ]).
+
+%% gah what is a good identity of a classic queue including all replicas
+-type queue_name() :: rabbit_types:r(queue).
+-type queue_ref() :: queue_name() | atom().
+-type queue_state() :: term().
+-type msg_tag() :: term().
+
+-define(STATE, ?MODULE).
+
+%% Recoverable slaves shouldn't really be a generic one, but let's keep it here until
+%% mirrored queues are deprecated.
+-define(DOWN_KEYS, [name, durable, auto_delete, arguments, pid, recoverable_slaves, type, state]).
+
+-define(QREF(QueueReference),
+ (is_tuple(QueueReference) andalso element(1, QueueReference) == resource)
+ orelse is_atom(QueueReference)).
+%% anything that the host process needs to do on behalf of the queue type
+%% session, like knowing when to notify on monitor down
+-type action() ::
+ {monitor, Pid :: pid(), queue_ref()} |
+ %% indicate to the queue type module that a message has been delivered
+ %% fully to the queue
+ {settled, Success :: boolean(), [msg_tag()]} |
+ {deliver, rabbit_types:ctag(), boolean(), [rabbit_amqqueue:qmsg()]}.
+
+-type actions() :: [action()].
+
+-type event() ::
+ {down, pid(), Info :: term()} |
+ term().
+
+-record(ctx, {module :: module(),
+ name :: queue_name(),
+ %% "publisher confirm queue accounting"
+ %% queue type implementation should emit a:
+ %% {settle, Success :: boolean(), msg_tag()}
+ %% to either settle or reject the delivery of a
+ %% message to the queue instance
+ %% The queue type module will then emit a {confirm | reject, [msg_tag()}
+ %% action to the channel or channel like process when a msg_tag
+ %% has reached its conclusion
+ state :: queue_state()}).
+
+
+-record(?STATE, {ctxs = #{} :: #{queue_ref() => #ctx{} | queue_ref()},
+ monitor_registry = #{} :: #{pid() => queue_ref()}
+ }).
+
+-opaque state() :: #?STATE{}.
+
+-type consume_spec() :: #{no_ack := boolean(),
+ channel_pid := pid(),
+ limiter_pid => pid(),
+ limiter_active => boolean(),
+ prefetch_count => non_neg_integer(),
+ consumer_tag := rabbit_types:ctag(),
+ exclusive_consume => boolean(),
+ args => rabbit_framing:amqp_table(),
+ ok_msg := term(),
+ acting_user := rabbit_types:username()}.
+
+
+
+% copied from rabbit_amqqueue
+-type absent_reason() :: 'nodedown' | 'crashed' | stopped | timeout.
+
+-type settle_op() :: 'complete' | 'requeue' | 'discard'.
+
+-export_type([state/0,
+ consume_spec/0,
+ action/0,
+ actions/0,
+ settle_op/0]).
+
+%% is the queue type feature enabled
+-callback is_enabled() -> boolean().
+
+-callback declare(amqqueue:amqqueue(), node()) ->
+ {'new' | 'existing' | 'owner_died', amqqueue:amqqueue()} |
+ {'absent', amqqueue:amqqueue(), absent_reason()} |
+ {'protocol_error', Type :: atom(), Reason :: string(), Args :: term()}.
+
+-callback delete(amqqueue:amqqueue(),
+ boolean(),
+ boolean(),
+ rabbit_types:username()) ->
+ rabbit_types:ok(non_neg_integer()) |
+ rabbit_types:error(in_use | not_empty) |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+
+-callback recover(rabbit_types:vhost(), [amqqueue:amqqueue()]) ->
+ {Recovered :: [amqqueue:amqqueue()],
+ Failed :: [amqqueue:amqqueue()]}.
+
+%% checks if the queue should be recovered
+-callback is_recoverable(amqqueue:amqqueue()) ->
+ boolean().
+
+-callback purge(amqqueue:amqqueue()) ->
+ {ok, non_neg_integer()} | {error, term()}.
+
+-callback policy_changed(amqqueue:amqqueue()) -> ok.
+
+%% stateful
+%% intitialise and return a queue type specific session context
+-callback init(amqqueue:amqqueue()) -> queue_state().
+
+-callback close(queue_state()) -> ok.
+%% update the queue type state from amqqrecord
+-callback update(amqqueue:amqqueue(), queue_state()) -> queue_state().
+
+-callback consume(amqqueue:amqqueue(),
+ consume_spec(),
+ queue_state()) ->
+ {ok, queue_state(), actions()} | {error, term()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+
+-callback cancel(amqqueue:amqqueue(),
+ rabbit_types:ctag(),
+ term(),
+ rabbit_types:username(),
+ queue_state()) ->
+ {ok, queue_state()} | {error, term()}.
+
+%% any async events returned from the queue system should be processed through
+%% this
+-callback handle_event(Event :: event(),
+ queue_state()) ->
+ {ok, queue_state(), actions()} | {error, term()} | eol |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+
+-callback deliver([{amqqueue:amqqueue(), queue_state()}],
+ Delivery :: term()) ->
+ {[{amqqueue:amqqueue(), queue_state()}], actions()}.
+
+-callback settle(settle_op(), rabbit_types:ctag(), [non_neg_integer()], queue_state()) ->
+ {queue_state(), actions()} |
+ {'protocol_error', Type :: atom(), Reason :: string(), Args :: term()}.
+
+-callback credit(rabbit_types:ctag(),
+ non_neg_integer(), Drain :: boolean(), queue_state()) ->
+ {queue_state(), actions()}.
+
+-callback dequeue(NoAck :: boolean(), LimiterPid :: pid(),
+ rabbit_types:ctag(), queue_state()) ->
+ {ok, Count :: non_neg_integer(), rabbit_amqqueue:qmsg(), queue_state()} |
+ {empty, queue_state()} |
+ {error, term()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+
+%% return a map of state summary information
+-callback state_info(queue_state()) ->
+ #{atom() := term()}.
+
+%% general queue info
+-callback info(amqqueue:amqqueue(), all_keys | rabbit_types:info_keys()) ->
+ rabbit_types:infos().
+
+-callback stat(amqqueue:amqqueue()) ->
+ {'ok', non_neg_integer(), non_neg_integer()}.
+
+-callback capabilities() ->
+ #{atom() := term()}.
+
+%% TODO: this should be controlled by a registry that is populated on boot
+discover(<<"quorum">>) ->
+ rabbit_quorum_queue;
+discover(<<"classic">>) ->
+ rabbit_classic_queue;
+discover(<<"stream">>) ->
+ rabbit_stream_queue.
+
+default() ->
+ rabbit_classic_queue.
+
+-spec is_enabled(module()) -> boolean().
+is_enabled(Type) ->
+ Type:is_enabled().
+
+-spec declare(amqqueue:amqqueue(), node()) ->
+ {'new' | 'existing' | 'owner_died', amqqueue:amqqueue()} |
+ {'absent', amqqueue:amqqueue(), absent_reason()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+declare(Q, Node) ->
+ Mod = amqqueue:get_type(Q),
+ Mod:declare(Q, Node).
+
+-spec delete(amqqueue:amqqueue(), boolean(),
+ boolean(), rabbit_types:username()) ->
+ rabbit_types:ok(non_neg_integer()) |
+ rabbit_types:error(in_use | not_empty) |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+delete(Q, IfUnused, IfEmpty, ActingUser) ->
+ Mod = amqqueue:get_type(Q),
+ Mod:delete(Q, IfUnused, IfEmpty, ActingUser).
+
+-spec purge(amqqueue:amqqueue()) ->
+ {'ok', non_neg_integer()} | {error, term()}.
+purge(Q) ->
+ Mod = amqqueue:get_type(Q),
+ Mod:purge(Q).
+
+-spec policy_changed(amqqueue:amqqueue()) -> 'ok'.
+policy_changed(Q) ->
+ Mod = amqqueue:get_type(Q),
+ Mod:policy_changed(Q).
+
+-spec stat(amqqueue:amqqueue()) ->
+ {'ok', non_neg_integer(), non_neg_integer()}.
+stat(Q) ->
+ Mod = amqqueue:get_type(Q),
+ Mod:stat(Q).
+
+-spec remove(queue_ref(), state()) -> state().
+remove(QRef, #?STATE{ctxs = Ctxs0} = State) ->
+ case maps:take(QRef, Ctxs0) of
+ error ->
+ State;
+ {_, Ctxs} ->
+ State#?STATE{ctxs = Ctxs}
+ end.
+
+-spec info(amqqueue:amqqueue(), all_keys | rabbit_types:info_keys()) ->
+ rabbit_types:infos().
+info(Q, Items) when ?amqqueue_state_is(Q, crashed) ->
+ info_down(Q, Items, crashed);
+info(Q, Items) when ?amqqueue_state_is(Q, stopped) ->
+ info_down(Q, Items, stopped);
+info(Q, Items) ->
+ Mod = amqqueue:get_type(Q),
+ Mod:info(Q, Items).
+
+fold_state(Fun, Acc, #?STATE{ctxs = Ctxs}) ->
+ maps:fold(Fun, Acc, Ctxs).
+
+state_info(#ctx{state = S,
+ module = Mod}) ->
+ Mod:state_info(S);
+state_info(_) ->
+ #{}.
+
+down_keys() -> ?DOWN_KEYS.
+
+info_down(Q, DownReason) ->
+ info_down(Q, down_keys(), DownReason).
+
+info_down(Q, all_keys, DownReason) ->
+ info_down(Q, down_keys(), DownReason);
+info_down(Q, Items, DownReason) ->
+ [{Item, i_down(Item, Q, DownReason)} || Item <- Items].
+
+i_down(name, Q, _) -> amqqueue:get_name(Q);
+i_down(durable, Q, _) -> amqqueue:is_durable(Q);
+i_down(auto_delete, Q, _) -> amqqueue:is_auto_delete(Q);
+i_down(arguments, Q, _) -> amqqueue:get_arguments(Q);
+i_down(pid, Q, _) -> amqqueue:get_pid(Q);
+i_down(recoverable_slaves, Q, _) -> amqqueue:get_recoverable_slaves(Q);
+i_down(type, Q, _) -> amqqueue:get_type(Q);
+i_down(state, _Q, DownReason) -> DownReason;
+i_down(_K, _Q, _DownReason) -> ''.
+
+is_policy_applicable(Q, Policy) ->
+ Mod = amqqueue:get_type(Q),
+ Capabilities = Mod:capabilities(),
+ Applicable = maps:get(policies, Capabilities, []),
+ lists:all(fun({P, _}) ->
+ lists:member(P, Applicable)
+ end, Policy).
+
+is_server_named_allowed(Type) ->
+ Capabilities = Type:capabilities(),
+ maps:get(server_named, Capabilities, false).
+
+-spec init() -> state().
+init() ->
+ #?STATE{}.
+
+-spec close(state()) -> ok.
+close(#?STATE{ctxs = Contexts}) ->
+ _ = maps:map(
+ fun (_, #ctx{module = Mod,
+ state = S}) ->
+ ok = Mod:close(S)
+ end, Contexts),
+ ok.
+
+-spec new(amqqueue:amqqueue(), state()) -> state().
+new(Q, State) when ?is_amqqueue(Q) ->
+ Ctx = get_ctx(Q, State),
+ set_ctx(Q, Ctx, State).
+
+-spec consume(amqqueue:amqqueue(), consume_spec(), state()) ->
+ {ok, state(), actions()} | {error, term()}.
+consume(Q, Spec, State) ->
+ #ctx{state = CtxState0} = Ctx = get_ctx(Q, State),
+ Mod = amqqueue:get_type(Q),
+ case Mod:consume(Q, Spec, CtxState0) of
+ {ok, CtxState, Actions} ->
+ return_ok(set_ctx(Q, Ctx#ctx{state = CtxState}, State), Actions);
+ Err ->
+ Err
+ end.
+
+%% TODO switch to cancel spec api
+-spec cancel(amqqueue:amqqueue(),
+ rabbit_types:ctag(),
+ term(),
+ rabbit_types:username(),
+ state()) ->
+ {ok, state()} | {error, term()}.
+cancel(Q, Tag, OkMsg, ActiveUser, Ctxs) ->
+ #ctx{state = State0} = Ctx = get_ctx(Q, Ctxs),
+ Mod = amqqueue:get_type(Q),
+ case Mod:cancel(Q, Tag, OkMsg, ActiveUser, State0) of
+ {ok, State} ->
+ {ok, set_ctx(Q, Ctx#ctx{state = State}, Ctxs)};
+ Err ->
+ Err
+ end.
+
+-spec is_recoverable(amqqueue:amqqueue()) ->
+ boolean().
+is_recoverable(Q) ->
+ Mod = amqqueue:get_type(Q),
+ Mod:is_recoverable(Q).
+
+-spec recover(rabbit_types:vhost(), [amqqueue:amqqueue()]) ->
+ {Recovered :: [amqqueue:amqqueue()],
+ Failed :: [amqqueue:amqqueue()]}.
+recover(VHost, Qs) ->
+ ByType = lists:foldl(
+ fun (Q, Acc) ->
+ T = amqqueue:get_type(Q),
+ maps:update_with(T, fun (X) ->
+ [Q | X]
+ end, Acc)
+ %% TODO resolve all registered queue types from registry
+ end, #{rabbit_classic_queue => [],
+ rabbit_quorum_queue => [],
+ rabbit_stream_queue => []}, Qs),
+ maps:fold(fun (Mod, Queues, {R0, F0}) ->
+ {R, F} = Mod:recover(VHost, Queues),
+ {R0 ++ R, F0 ++ F}
+ end, {[], []}, ByType).
+
+-spec handle_down(pid(), term(), state()) ->
+ {ok, state(), actions()} | {eol, queue_ref()} | {error, term()}.
+handle_down(Pid, Info, #?STATE{monitor_registry = Reg0} = State0) ->
+ %% lookup queue ref in monitor registry
+ case maps:take(Pid, Reg0) of
+ {QRef, Reg} ->
+ case handle_event(QRef, {down, Pid, Info}, State0) of
+ {ok, State, Actions} ->
+ {ok, State#?STATE{monitor_registry = Reg}, Actions};
+ eol ->
+ {eol, QRef};
+ Err ->
+ Err
+ end;
+ error ->
+ {ok, State0, []}
+ end.
+
+%% messages sent from queues
+-spec handle_event(queue_ref(), term(), state()) ->
+ {ok, state(), actions()} | eol | {error, term()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+handle_event(QRef, Evt, Ctxs) ->
+ %% events can arrive after a queue state has been cleared up
+ %% so need to be defensive here
+ case get_ctx(QRef, Ctxs, undefined) of
+ #ctx{module = Mod,
+ state = State0} = Ctx ->
+ case Mod:handle_event(Evt, State0) of
+ {ok, State, Actions} ->
+ return_ok(set_ctx(QRef, Ctx#ctx{state = State}, Ctxs), Actions);
+ Err ->
+ Err
+ end;
+ undefined ->
+ {ok, Ctxs, []}
+ end.
+
+-spec module(queue_ref(), state()) ->
+ {ok, module()} | {error, not_found}.
+module(QRef, Ctxs) ->
+ %% events can arrive after a queue state has been cleared up
+ %% so need to be defensive here
+ case get_ctx(QRef, Ctxs, undefined) of
+ #ctx{module = Mod} ->
+ {ok, Mod};
+ undefined ->
+ {error, not_found}
+ end.
+
+-spec deliver([amqqueue:amqqueue()], Delivery :: term(),
+ stateless | state()) ->
+ {ok, state(), actions()}.
+deliver(Qs, Delivery, stateless) ->
+ _ = lists:map(fun(Q) ->
+ Mod = amqqueue:get_type(Q),
+ _ = Mod:deliver([{Q, stateless}], Delivery)
+ end, Qs),
+ {ok, stateless, []};
+deliver(Qs, Delivery, #?STATE{} = State0) ->
+ %% sort by queue type - then dispatch each group
+ ByType = lists:foldl(
+ fun (Q, Acc) ->
+ T = amqqueue:get_type(Q),
+ Ctx = get_ctx(Q, State0),
+ maps:update_with(
+ T, fun (A) ->
+ [{Q, Ctx#ctx.state} | A]
+ end, [{Q, Ctx#ctx.state}], Acc)
+ end, #{}, Qs),
+ %%% dispatch each group to queue type interface?
+ {Xs, Actions} = maps:fold(fun(Mod, QSs, {X0, A0}) ->
+ {X, A} = Mod:deliver(QSs, Delivery),
+ {X0 ++ X, A0 ++ A}
+ end, {[], []}, ByType),
+ State = lists:foldl(
+ fun({Q, S}, Acc) ->
+ Ctx = get_ctx(Q, Acc),
+ set_ctx(qref(Q), Ctx#ctx{state = S}, Acc)
+ end, State0, Xs),
+ return_ok(State, Actions).
+
+
+-spec settle(queue_ref(), settle_op(), rabbit_types:ctag(),
+ [non_neg_integer()], state()) ->
+ {ok, state(), actions()} |
+ {'protocol_error', Type :: atom(), Reason :: string(), Args :: term()}.
+settle(QRef, Op, CTag, MsgIds, Ctxs)
+ when ?QREF(QRef) ->
+ case get_ctx(QRef, Ctxs, undefined) of
+ undefined ->
+ %% if we receive a settlement and there is no queue state it means
+ %% the queue was deleted with active consumers
+ {ok, Ctxs, []};
+ #ctx{state = State0,
+ module = Mod} = Ctx ->
+ case Mod:settle(Op, CTag, MsgIds, State0) of
+ {State, Actions} ->
+ {ok, set_ctx(QRef, Ctx#ctx{state = State}, Ctxs), Actions};
+ Err ->
+ Err
+ end
+ end.
+
+-spec credit(amqqueue:amqqueue() | queue_ref(),
+ rabbit_types:ctag(), non_neg_integer(),
+ boolean(), state()) -> {ok, state(), actions()}.
+credit(Q, CTag, Credit, Drain, Ctxs) ->
+ #ctx{state = State0,
+ module = Mod} = Ctx = get_ctx(Q, Ctxs),
+ {State, Actions} = Mod:credit(CTag, Credit, Drain, State0),
+ {ok, set_ctx(Q, Ctx#ctx{state = State}, Ctxs), Actions}.
+
+-spec dequeue(amqqueue:amqqueue(), boolean(),
+ pid(), rabbit_types:ctag(), state()) ->
+ {ok, non_neg_integer(), term(), state()} |
+ {empty, state()}.
+dequeue(Q, NoAck, LimiterPid, CTag, Ctxs) ->
+ #ctx{state = State0} = Ctx = get_ctx(Q, Ctxs),
+ Mod = amqqueue:get_type(Q),
+ case Mod:dequeue(NoAck, LimiterPid, CTag, State0) of
+ {ok, Num, Msg, State} ->
+ {ok, Num, Msg, set_ctx(Q, Ctx#ctx{state = State}, Ctxs)};
+ {empty, State} ->
+ {empty, set_ctx(Q, Ctx#ctx{state = State}, Ctxs)};
+ {error, _} = Err ->
+ Err;
+ {protocol_error, _, _, _} = Err ->
+ Err
+ end.
+
+get_ctx(Q, #?STATE{ctxs = Contexts}) when ?is_amqqueue(Q) ->
+ Ref = qref(Q),
+ case Contexts of
+ #{Ref := #ctx{module = Mod,
+ state = State} = Ctx} ->
+ Ctx#ctx{state = Mod:update(Q, State)};
+ _ ->
+ %% not found - initialize
+ Mod = amqqueue:get_type(Q),
+ Name = amqqueue:get_name(Q),
+ #ctx{module = Mod,
+ name = Name,
+ state = Mod:init(Q)}
+ end;
+get_ctx(QRef, Contexts) when ?QREF(QRef) ->
+ case get_ctx(QRef, Contexts, undefined) of
+ undefined ->
+ exit({queue_context_not_found, QRef});
+ Ctx ->
+ Ctx
+ end.
+
+get_ctx(QRef, #?STATE{ctxs = Contexts}, Default) ->
+ Ref = qref(QRef),
+ %% if we use a QRef it should always be initialised
+ case maps:get(Ref, Contexts, undefined) of
+ #ctx{} = Ctx ->
+ Ctx;
+ undefined ->
+ Default
+ end.
+
+set_ctx(Q, Ctx, #?STATE{ctxs = Contexts} = State)
+ when ?is_amqqueue(Q) ->
+ Ref = qref(Q),
+ State#?STATE{ctxs = maps:put(Ref, Ctx, Contexts)};
+set_ctx(QRef, Ctx, #?STATE{ctxs = Contexts} = State) ->
+ Ref = qref(QRef),
+ State#?STATE{ctxs = maps:put(Ref, Ctx, Contexts)}.
+
+qref(#resource{kind = queue} = QName) ->
+ QName;
+qref(Q) when ?is_amqqueue(Q) ->
+ amqqueue:get_name(Q).
+
+return_ok(State0, []) ->
+ {ok, State0, []};
+return_ok(State0, Actions0) ->
+ {State, Actions} =
+ lists:foldl(
+ fun({monitor, Pid, QRef},
+ {#?STATE{monitor_registry = M0} = S0, A0}) ->
+ case M0 of
+ #{Pid := QRef} ->
+ %% already monitored by the qref
+ {S0, A0};
+ #{Pid := _} ->
+ %% TODO: allow multiple Qrefs to monitor the same pid
+ exit(return_ok_duplicate_monitored_pid);
+ _ ->
+ _ = erlang:monitor(process, Pid),
+ M = M0#{Pid => QRef},
+ {S0#?STATE{monitor_registry = M}, A0}
+ end;
+ (Act, {S, A0}) ->
+ {S, [Act | A0]}
+ end, {State0, []}, Actions0),
+ {ok, State, lists:reverse(Actions)}.
diff --git a/deps/rabbit/src/rabbit_queue_type_util.erl b/deps/rabbit/src/rabbit_queue_type_util.erl
new file mode 100644
index 0000000000..e417cb13c4
--- /dev/null
+++ b/deps/rabbit/src/rabbit_queue_type_util.erl
@@ -0,0 +1,74 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at https://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is GoPivotal, Inc.
+%% Copyright (c) 2018-2020 Pivotal Software, Inc. All rights reserved.
+%%
+
+-module(rabbit_queue_type_util).
+
+-export([args_policy_lookup/3,
+ qname_to_internal_name/1,
+ check_auto_delete/1,
+ check_exclusive/1,
+ check_non_durable/1,
+ run_checks/2]).
+
+-include("rabbit.hrl").
+-include("amqqueue.hrl").
+
+args_policy_lookup(Name, Resolve, Q) when ?is_amqqueue(Q) ->
+ Args = amqqueue:get_arguments(Q),
+ AName = <<"x-", Name/binary>>,
+ case {rabbit_policy:get(Name, Q), rabbit_misc:table_lookup(Args, AName)} of
+ {undefined, undefined} -> undefined;
+ {undefined, {_Type, Val}} -> Val;
+ {Val, undefined} -> Val;
+ {PolVal, {_Type, ArgVal}} -> Resolve(PolVal, ArgVal)
+ end.
+
+%% TODO escape hack
+qname_to_internal_name(#resource{virtual_host = <<"/">>, name = Name}) ->
+ erlang:binary_to_atom(<<"%2F_", Name/binary>>, utf8);
+qname_to_internal_name(#resource{virtual_host = VHost, name = Name}) ->
+ erlang:binary_to_atom(<<VHost/binary, "_", Name/binary>>, utf8).
+
+check_auto_delete(Q) when ?amqqueue_is_auto_delete(Q) ->
+ Name = amqqueue:get_name(Q),
+ {protocol_error, precondition_failed, "invalid property 'auto-delete' for ~s",
+ [rabbit_misc:rs(Name)]};
+check_auto_delete(_) ->
+ ok.
+
+check_exclusive(Q) when ?amqqueue_exclusive_owner_is(Q, none) ->
+ ok;
+check_exclusive(Q) when ?is_amqqueue(Q) ->
+ Name = amqqueue:get_name(Q),
+ {protocol_error, precondition_failed, "invalid property 'exclusive-owner' for ~s",
+ [rabbit_misc:rs(Name)]}.
+
+check_non_durable(Q) when ?amqqueue_is_durable(Q) ->
+ ok;
+check_non_durable(Q) when not ?amqqueue_is_durable(Q) ->
+ Name = amqqueue:get_name(Q),
+ {protocol_error, precondition_failed, "invalid property 'non-durable' for ~s",
+ [rabbit_misc:rs(Name)]}.
+
+run_checks([], _) ->
+ ok;
+run_checks([C | Checks], Q) ->
+ case C(Q) of
+ ok ->
+ run_checks(Checks, Q);
+ Err ->
+ Err
+ end.
diff --git a/deps/rabbit/src/rabbit_quorum_memory_manager.erl b/deps/rabbit/src/rabbit_quorum_memory_manager.erl
new file mode 100644
index 0000000000..94c2ef6b4b
--- /dev/null
+++ b/deps/rabbit/src/rabbit_quorum_memory_manager.erl
@@ -0,0 +1,67 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+-module(rabbit_quorum_memory_manager).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+-export([init/1, handle_call/2, handle_event/2, handle_info/2,
+ terminate/2, code_change/3]).
+-export([register/0, unregister/0]).
+
+-record(state, {last_roll_over,
+ interval}).
+
+-rabbit_boot_step({rabbit_quorum_memory_manager,
+ [{description, "quorum memory manager"},
+ {mfa, {?MODULE, register, []}},
+ {cleanup, {?MODULE, unregister, []}},
+ {requires, rabbit_event},
+ {enables, recovery}]}).
+
+register() ->
+ gen_event:add_handler(rabbit_alarm, ?MODULE, []).
+
+unregister() ->
+ gen_event:delete_handler(rabbit_alarm, ?MODULE, []).
+
+init([]) ->
+ {ok, #state{interval = interval()}}.
+
+handle_call( _, State) ->
+ {ok, ok, State}.
+
+handle_event({set_alarm, {{resource_limit, memory, Node}, []}},
+ #state{last_roll_over = undefined} = State) when Node == node() ->
+ {ok, force_roll_over(State)};
+handle_event({set_alarm, {{resource_limit, memory, Node}, []}},
+ #state{last_roll_over = Last, interval = Interval } = State)
+ when Node == node() ->
+ Now = erlang:system_time(millisecond),
+ case Now > (Last + Interval) of
+ true ->
+ {ok, force_roll_over(State)};
+ false ->
+ {ok, State}
+ end;
+handle_event(_, State) ->
+ {ok, State}.
+
+handle_info(_, State) ->
+ {ok, State}.
+
+terminate(_, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+force_roll_over(State) ->
+ ra_log_wal:force_roll_over(ra_log_wal),
+ State#state{last_roll_over = erlang:system_time(millisecond)}.
+
+interval() ->
+ application:get_env(rabbit, min_wal_roll_over_interval, 20000).
diff --git a/deps/rabbit/src/rabbit_quorum_queue.erl b/deps/rabbit/src/rabbit_quorum_queue.erl
new file mode 100644
index 0000000000..95cc93d728
--- /dev/null
+++ b/deps/rabbit/src/rabbit_quorum_queue.erl
@@ -0,0 +1,1523 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_quorum_queue).
+
+-behaviour(rabbit_queue_type).
+
+-export([init/1,
+ close/1,
+ update/2,
+ handle_event/2]).
+-export([is_recoverable/1, recover/2, stop/1, delete/4, delete_immediately/2]).
+-export([state_info/1, info/2, stat/1, infos/1]).
+-export([settle/4, dequeue/4, consume/3, cancel/5]).
+-export([credit/4]).
+-export([purge/1]).
+-export([stateless_deliver/2, deliver/3, deliver/2]).
+-export([dead_letter_publish/4]).
+-export([queue_name/1]).
+-export([cluster_state/1, status/2]).
+-export([update_consumer_handler/8, update_consumer/9]).
+-export([cancel_consumer_handler/2, cancel_consumer/3]).
+-export([become_leader/2, handle_tick/3, spawn_deleter/1]).
+-export([rpc_delete_metrics/1]).
+-export([format/1]).
+-export([open_files/1]).
+-export([peek/2, peek/3]).
+-export([add_member/4]).
+-export([delete_member/3]).
+-export([requeue/3]).
+-export([policy_changed/1]).
+-export([format_ra_event/3]).
+-export([cleanup_data_dir/0]).
+-export([shrink_all/1,
+ grow/4]).
+-export([transfer_leadership/2, get_replicas/1, queue_length/1]).
+-export([file_handle_leader_reservation/1, file_handle_other_reservation/0]).
+-export([file_handle_release_reservation/0]).
+-export([list_with_minimum_quorum/0, list_with_minimum_quorum_for_cli/0,
+ filter_quorum_critical/1, filter_quorum_critical/2,
+ all_replica_states/0]).
+-export([capabilities/0]).
+-export([repair_amqqueue_nodes/1,
+ repair_amqqueue_nodes/2
+ ]).
+-export([reclaim_memory/2]).
+
+-export([is_enabled/0,
+ declare/2]).
+
+-import(rabbit_queue_type_util, [args_policy_lookup/3,
+ qname_to_internal_name/1]).
+
+-include_lib("stdlib/include/qlc.hrl").
+-include("rabbit.hrl").
+-include("amqqueue.hrl").
+
+-type msg_id() :: non_neg_integer().
+-type qmsg() :: {rabbit_types:r('queue'), pid(), msg_id(), boolean(), rabbit_types:message()}.
+
+-define(STATISTICS_KEYS,
+ [policy,
+ operator_policy,
+ effective_policy_definition,
+ consumers,
+ memory,
+ state,
+ garbage_collection,
+ leader,
+ online,
+ members,
+ open_files,
+ single_active_consumer_pid,
+ single_active_consumer_ctag,
+ messages_ram,
+ message_bytes_ram
+ ]).
+
+-define(INFO_KEYS, [name, durable, auto_delete, arguments, pid, messages, messages_ready,
+ messages_unacknowledged, local_state, type] ++ ?STATISTICS_KEYS).
+
+-define(RPC_TIMEOUT, 1000).
+-define(TICK_TIMEOUT, 5000). %% the ra server tick time
+-define(DELETE_TIMEOUT, 5000).
+-define(ADD_MEMBER_TIMEOUT, 5000).
+
+%%----------- rabbit_queue_type ---------------------------------------------
+
+-spec is_enabled() -> boolean().
+is_enabled() ->
+ rabbit_feature_flags:is_enabled(quorum_queue).
+
+%%----------------------------------------------------------------------------
+
+-spec init(amqqueue:amqqueue()) -> rabbit_fifo_client:state().
+init(Q) when ?is_amqqueue(Q) ->
+ {ok, SoftLimit} = application:get_env(rabbit, quorum_commands_soft_limit),
+ %% This lookup could potentially return an {error, not_found}, but we do not
+ %% know what to do if the queue has `disappeared`. Let it crash.
+ {Name, _LeaderNode} = Leader = amqqueue:get_pid(Q),
+ Nodes = get_nodes(Q),
+ QName = amqqueue:get_name(Q),
+ %% Ensure the leader is listed first
+ Servers0 = [{Name, N} || N <- Nodes],
+ Servers = [Leader | lists:delete(Leader, Servers0)],
+ rabbit_fifo_client:init(QName, Servers, SoftLimit,
+ fun() -> credit_flow:block(Name) end,
+ fun() -> credit_flow:unblock(Name), ok end).
+
+-spec close(rabbit_fifo_client:state()) -> ok.
+close(_State) ->
+ ok.
+
+-spec update(amqqueue:amqqueue(), rabbit_fifo_client:state()) ->
+ rabbit_fifo_client:state().
+update(Q, State) when ?amqqueue_is_quorum(Q) ->
+ %% QQ state maintains it's own updates
+ State.
+
+-spec handle_event({amqqueue:ra_server_id(), any()},
+ rabbit_fifo_client:state()) ->
+ {ok, rabbit_fifo_client:state(), rabbit_queue_type:actions()} |
+ eol |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+handle_event({From, Evt}, QState) ->
+ rabbit_fifo_client:handle_ra_event(From, Evt, QState).
+
+-spec declare(amqqueue:amqqueue(), node()) ->
+ {new | existing, amqqueue:amqqueue()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+declare(Q, _Node) when ?amqqueue_is_quorum(Q) ->
+ case rabbit_queue_type_util:run_checks(
+ [fun rabbit_queue_type_util:check_auto_delete/1,
+ fun rabbit_queue_type_util:check_exclusive/1,
+ fun rabbit_queue_type_util:check_non_durable/1],
+ Q) of
+ ok ->
+ start_cluster(Q);
+ Err ->
+ Err
+ end.
+
+start_cluster(Q) ->
+ QName = amqqueue:get_name(Q),
+ Durable = amqqueue:is_durable(Q),
+ AutoDelete = amqqueue:is_auto_delete(Q),
+ Arguments = amqqueue:get_arguments(Q),
+ Opts = amqqueue:get_options(Q),
+ ActingUser = maps:get(user, Opts, ?UNKNOWN_USER),
+ QuorumSize = get_default_quorum_initial_group_size(Arguments),
+ RaName = qname_to_internal_name(QName),
+ Id = {RaName, node()},
+ Nodes = select_quorum_nodes(QuorumSize, rabbit_mnesia:cluster_nodes(all)),
+ NewQ0 = amqqueue:set_pid(Q, Id),
+ NewQ1 = amqqueue:set_type_state(NewQ0, #{nodes => Nodes}),
+ case rabbit_amqqueue:internal_declare(NewQ1, false) of
+ {created, NewQ} ->
+ TickTimeout = application:get_env(rabbit, quorum_tick_interval, ?TICK_TIMEOUT),
+ RaConfs = [make_ra_conf(NewQ, ServerId, TickTimeout)
+ || ServerId <- members(NewQ)],
+ case ra:start_cluster(RaConfs) of
+ {ok, _, _} ->
+ %% TODO: handle error - what should be done if the
+ %% config cannot be updated
+ ok = rabbit_fifo_client:update_machine_state(Id,
+ ra_machine_config(NewQ)),
+ %% force a policy change to ensure the latest config is
+ %% updated even when running the machine version from 0
+ rabbit_event:notify(queue_created,
+ [{name, QName},
+ {durable, Durable},
+ {auto_delete, AutoDelete},
+ {arguments, Arguments},
+ {user_who_performed_action,
+ ActingUser}]),
+ {new, NewQ};
+ {error, Error} ->
+ _ = rabbit_amqqueue:internal_delete(QName, ActingUser),
+ {protocol_error, internal_error,
+ "Cannot declare a queue '~s' on node '~s': ~255p",
+ [rabbit_misc:rs(QName), node(), Error]}
+ end;
+ {existing, _} = Ex ->
+ Ex
+ end.
+
+ra_machine(Q) ->
+ {module, rabbit_fifo, ra_machine_config(Q)}.
+
+ra_machine_config(Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ {Name, _} = amqqueue:get_pid(Q),
+ %% take the minimum value of the policy and the queue arg if present
+ MaxLength = args_policy_lookup(<<"max-length">>, fun min/2, Q),
+ %% prefer the policy defined strategy if available
+ Overflow = args_policy_lookup(<<"overflow">>, fun (A, _B) -> A end , Q),
+ MaxBytes = args_policy_lookup(<<"max-length-bytes">>, fun min/2, Q),
+ MaxMemoryLength = args_policy_lookup(<<"max-in-memory-length">>, fun min/2, Q),
+ MaxMemoryBytes = args_policy_lookup(<<"max-in-memory-bytes">>, fun min/2, Q),
+ DeliveryLimit = args_policy_lookup(<<"delivery-limit">>, fun min/2, Q),
+ Expires = args_policy_lookup(<<"expires">>,
+ fun (A, _B) -> A end,
+ Q),
+ #{name => Name,
+ queue_resource => QName,
+ dead_letter_handler => dlx_mfa(Q),
+ become_leader_handler => {?MODULE, become_leader, [QName]},
+ max_length => MaxLength,
+ max_bytes => MaxBytes,
+ max_in_memory_length => MaxMemoryLength,
+ max_in_memory_bytes => MaxMemoryBytes,
+ single_active_consumer_on => single_active_consumer_on(Q),
+ delivery_limit => DeliveryLimit,
+ overflow_strategy => overflow(Overflow, drop_head, QName),
+ created => erlang:system_time(millisecond),
+ expires => Expires
+ }.
+
+single_active_consumer_on(Q) ->
+ QArguments = amqqueue:get_arguments(Q),
+ case rabbit_misc:table_lookup(QArguments, <<"x-single-active-consumer">>) of
+ {bool, true} -> true;
+ _ -> false
+ end.
+
+update_consumer_handler(QName, {ConsumerTag, ChPid}, Exclusive, AckRequired, Prefetch, Active, ActivityStatus, Args) ->
+ local_or_remote_handler(ChPid, rabbit_quorum_queue, update_consumer,
+ [QName, ChPid, ConsumerTag, Exclusive, AckRequired, Prefetch, Active, ActivityStatus, Args]).
+
+update_consumer(QName, ChPid, ConsumerTag, Exclusive, AckRequired, Prefetch, Active, ActivityStatus, Args) ->
+ catch rabbit_core_metrics:consumer_updated(ChPid, ConsumerTag, Exclusive, AckRequired,
+ QName, Prefetch, Active, ActivityStatus, Args).
+
+cancel_consumer_handler(QName, {ConsumerTag, ChPid}) ->
+ local_or_remote_handler(ChPid, rabbit_quorum_queue, cancel_consumer,
+ [QName, ChPid, ConsumerTag]).
+
+cancel_consumer(QName, ChPid, ConsumerTag) ->
+ catch rabbit_core_metrics:consumer_deleted(ChPid, ConsumerTag, QName),
+ emit_consumer_deleted(ChPid, ConsumerTag, QName, ?INTERNAL_USER).
+
+local_or_remote_handler(ChPid, Module, Function, Args) ->
+ Node = node(ChPid),
+ case Node == node() of
+ true ->
+ erlang:apply(Module, Function, Args);
+ false ->
+ %% this could potentially block for a while if the node is
+ %% in disconnected state or tcp buffers are full
+ rpc:cast(Node, Module, Function, Args)
+ end.
+
+become_leader(QName, Name) ->
+ Fun = fun (Q1) ->
+ amqqueue:set_state(
+ amqqueue:set_pid(Q1, {Name, node()}),
+ live)
+ end,
+ %% as this function is called synchronously when a ra node becomes leader
+ %% we need to ensure there is no chance of blocking as else the ra node
+ %% may not be able to establish it's leadership
+ spawn(fun() ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ rabbit_amqqueue:update(QName, Fun)
+ end),
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q0} when ?is_amqqueue(Q0) ->
+ Nodes = get_nodes(Q0),
+ [rpc:call(Node, ?MODULE, rpc_delete_metrics,
+ [QName], ?RPC_TIMEOUT)
+ || Node <- Nodes, Node =/= node()];
+ _ ->
+ ok
+ end
+ end).
+
+-spec all_replica_states() -> {node(), #{atom() => atom()}}.
+all_replica_states() ->
+ Rows = ets:tab2list(ra_state),
+ {node(), maps:from_list(Rows)}.
+
+-spec list_with_minimum_quorum() -> [amqqueue:amqqueue()].
+list_with_minimum_quorum() ->
+ filter_quorum_critical(
+ rabbit_amqqueue:list_local_quorum_queues()).
+
+-spec list_with_minimum_quorum_for_cli() -> [#{binary() => term()}].
+list_with_minimum_quorum_for_cli() ->
+ QQs = list_with_minimum_quorum(),
+ [begin
+ #resource{name = Name} = amqqueue:get_name(Q),
+ #{
+ <<"readable_name">> => rabbit_data_coercion:to_binary(rabbit_misc:rs(amqqueue:get_name(Q))),
+ <<"name">> => Name,
+ <<"virtual_host">> => amqqueue:get_vhost(Q),
+ <<"type">> => <<"quorum">>
+ }
+ end || Q <- QQs].
+
+-spec filter_quorum_critical([amqqueue:amqqueue()]) -> [amqqueue:amqqueue()].
+filter_quorum_critical(Queues) ->
+ %% Example map of QQ replica states:
+ %% #{rabbit@warp10 =>
+ %% #{'%2F_qq.636' => leader,'%2F_qq.243' => leader,
+ %% '%2F_qq.1939' => leader,'%2F_qq.1150' => leader,
+ %% '%2F_qq.1109' => leader,'%2F_qq.1654' => leader,
+ %% '%2F_qq.1679' => leader,'%2F_qq.1003' => leader,
+ %% '%2F_qq.1593' => leader,'%2F_qq.1765' => leader,
+ %% '%2F_qq.933' => leader,'%2F_qq.38' => leader,
+ %% '%2F_qq.1357' => leader,'%2F_qq.1345' => leader,
+ %% '%2F_qq.1694' => leader,'%2F_qq.994' => leader,
+ %% '%2F_qq.490' => leader,'%2F_qq.1704' => leader,
+ %% '%2F_qq.58' => leader,'%2F_qq.564' => leader,
+ %% '%2F_qq.683' => leader,'%2F_qq.386' => leader,
+ %% '%2F_qq.753' => leader,'%2F_qq.6' => leader,
+ %% '%2F_qq.1590' => leader,'%2F_qq.1363' => leader,
+ %% '%2F_qq.882' => leader,'%2F_qq.1161' => leader,...}}
+ ReplicaStates = maps:from_list(
+ rabbit_misc:append_rpc_all_nodes(rabbit_nodes:all_running(),
+ ?MODULE, all_replica_states, [])),
+ filter_quorum_critical(Queues, ReplicaStates).
+
+-spec filter_quorum_critical([amqqueue:amqqueue()], #{node() => #{atom() => atom()}}) -> [amqqueue:amqqueue()].
+
+filter_quorum_critical(Queues, ReplicaStates) ->
+ lists:filter(fun (Q) ->
+ MemberNodes = rabbit_amqqueue:get_quorum_nodes(Q),
+ {Name, _Node} = amqqueue:get_pid(Q),
+ AllUp = lists:filter(fun (N) ->
+ {Name, _} = amqqueue:get_pid(Q),
+ case maps:get(N, ReplicaStates, undefined) of
+ #{Name := State} when State =:= follower orelse State =:= leader ->
+ true;
+ _ -> false
+ end
+ end, MemberNodes),
+ MinQuorum = length(MemberNodes) div 2 + 1,
+ length(AllUp) =< MinQuorum
+ end, Queues).
+
+capabilities() ->
+ #{policies => [<<"max-length">>, <<"max-length-bytes">>, <<"overflow">>,
+ <<"expires">>, <<"max-in-memory-length">>, <<"max-in-memory-bytes">>,
+ <<"delivery-limit">>, <<"dead-letter-exchange">>, <<"dead-letter-routing-key">>],
+ queue_arguments => [<<"x-expires">>, <<"x-dead-letter-exchange">>,
+ <<"x-dead-letter-routing-key">>, <<"x-max-length">>,
+ <<"x-max-length-bytes">>, <<"x-max-in-memory-length">>,
+ <<"x-max-in-memory-bytes">>, <<"x-overflow">>,
+ <<"x-single-active-consumer">>, <<"x-queue-type">>,
+ <<"x-quorum-initial-group-size">>, <<"x-delivery-limit">>],
+ consumer_arguments => [<<"x-priority">>, <<"x-credit">>],
+ server_named => false}.
+
+rpc_delete_metrics(QName) ->
+ ets:delete(queue_coarse_metrics, QName),
+ ets:delete(queue_metrics, QName),
+ ok.
+
+spawn_deleter(QName) ->
+ spawn(fun () ->
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ delete(Q, false, false, <<"expired">>)
+ end).
+
+handle_tick(QName,
+ {Name, MR, MU, M, C, MsgBytesReady, MsgBytesUnack},
+ Nodes) ->
+ %% this makes calls to remote processes so cannot be run inside the
+ %% ra server
+ Self = self(),
+ _ = spawn(fun() ->
+ R = reductions(Name),
+ rabbit_core_metrics:queue_stats(QName, MR, MU, M, R),
+ Util = case C of
+ 0 -> 0;
+ _ -> rabbit_fifo:usage(Name)
+ end,
+ Infos = [{consumers, C},
+ {consumer_utilisation, Util},
+ {message_bytes_ready, MsgBytesReady},
+ {message_bytes_unacknowledged, MsgBytesUnack},
+ {message_bytes, MsgBytesReady + MsgBytesUnack},
+ {message_bytes_persistent, MsgBytesReady + MsgBytesUnack},
+ {messages_persistent, M}
+
+ | infos(QName, ?STATISTICS_KEYS -- [consumers])],
+ rabbit_core_metrics:queue_stats(QName, Infos),
+ rabbit_event:notify(queue_stats,
+ Infos ++ [{name, QName},
+ {messages, M},
+ {messages_ready, MR},
+ {messages_unacknowledged, MU},
+ {reductions, R}]),
+ ok = repair_leader_record(QName, Self),
+ ExpectedNodes = rabbit_mnesia:cluster_nodes(all),
+ case Nodes -- ExpectedNodes of
+ [] ->
+ ok;
+ Stale ->
+ rabbit_log:info("~s: stale nodes detected. Purging ~w~n",
+ [rabbit_misc:rs(QName), Stale]),
+ %% pipeline purge command
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ ok = ra:pipeline_command(amqqueue:get_pid(Q),
+ rabbit_fifo:make_purge_nodes(Stale)),
+
+ ok
+ end
+ end),
+ ok.
+
+repair_leader_record(QName, Self) ->
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ Node = node(),
+ case amqqueue:get_pid(Q) of
+ {_, Node} ->
+ %% it's ok - we don't need to do anything
+ ok;
+ _ ->
+ rabbit_log:debug("~s: repairing leader record",
+ [rabbit_misc:rs(QName)]),
+ {_, Name} = erlang:process_info(Self, registered_name),
+ become_leader(QName, Name)
+ end,
+ ok.
+
+repair_amqqueue_nodes(VHost, QueueName) ->
+ QName = #resource{virtual_host = VHost, name = QueueName, kind = queue},
+ repair_amqqueue_nodes(QName).
+
+-spec repair_amqqueue_nodes(rabbit_types:r('queue') | amqqueue:amqqueue()) ->
+ ok | repaired.
+repair_amqqueue_nodes(QName = #resource{}) ->
+ {ok, Q0} = rabbit_amqqueue:lookup(QName),
+ repair_amqqueue_nodes(Q0);
+repair_amqqueue_nodes(Q0) ->
+ QName = amqqueue:get_name(Q0),
+ Leader = amqqueue:get_pid(Q0),
+ {ok, Members, _} = ra:members(Leader),
+ RaNodes = [N || {_, N} <- Members],
+ #{nodes := Nodes} = amqqueue:get_type_state(Q0),
+ case lists:sort(RaNodes) =:= lists:sort(Nodes) of
+ true ->
+ %% up to date
+ ok;
+ false ->
+ %% update amqqueue record
+ Fun = fun (Q) ->
+ TS0 = amqqueue:get_type_state(Q),
+ TS = TS0#{nodes => RaNodes},
+ amqqueue:set_type_state(Q, TS)
+ end,
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ rabbit_amqqueue:update(QName, Fun)
+ end),
+ repaired
+ end.
+
+reductions(Name) ->
+ try
+ {reductions, R} = process_info(whereis(Name), reductions),
+ R
+ catch
+ error:badarg ->
+ 0
+ end.
+
+is_recoverable(Q) ->
+ Node = node(),
+ Nodes = get_nodes(Q),
+ lists:member(Node, Nodes).
+
+-spec recover(binary(), [amqqueue:amqqueue()]) ->
+ {[amqqueue:amqqueue()], [amqqueue:amqqueue()]}.
+recover(_Vhost, Queues) ->
+ lists:foldl(
+ fun (Q0, {R0, F0}) ->
+ {Name, _} = amqqueue:get_pid(Q0),
+ QName = amqqueue:get_name(Q0),
+ Nodes = get_nodes(Q0),
+ Formatter = {?MODULE, format_ra_event, [QName]},
+ Res = case ra:restart_server({Name, node()},
+ #{ra_event_formatter => Formatter}) of
+ ok ->
+ % queue was restarted, good
+ ok;
+ {error, Err1}
+ when Err1 == not_started orelse
+ Err1 == name_not_registered ->
+ % queue was never started on this node
+ % so needs to be started from scratch.
+ Machine = ra_machine(Q0),
+ RaNodes = [{Name, Node} || Node <- Nodes],
+ case ra:start_server(Name, {Name, node()}, Machine, RaNodes) of
+ ok -> ok;
+ Err2 ->
+ rabbit_log:warning("recover: quorum queue ~w could not"
+ " be started ~w", [Name, Err2]),
+ fail
+ end;
+ {error, {already_started, _}} ->
+ %% this is fine and can happen if a vhost crashes and performs
+ %% recovery whilst the ra application and servers are still
+ %% running
+ ok;
+ Err ->
+ %% catch all clause to avoid causing the vhost not to start
+ rabbit_log:warning("recover: quorum queue ~w could not be "
+ "restarted ~w", [Name, Err]),
+ fail
+ end,
+ %% we have to ensure the quorum queue is
+ %% present in the rabbit_queue table and not just in
+ %% rabbit_durable_queue
+ %% So many code paths are dependent on this.
+ {ok, Q} = rabbit_amqqueue:ensure_rabbit_queue_record_is_initialized(Q0),
+ case Res of
+ ok ->
+ {[Q | R0], F0};
+ fail ->
+ {R0, [Q | F0]}
+ end
+ end, {[], []}, Queues).
+
+-spec stop(rabbit_types:vhost()) -> ok.
+stop(VHost) ->
+ _ = [begin
+ Pid = amqqueue:get_pid(Q),
+ ra:stop_server(Pid)
+ end || Q <- find_quorum_queues(VHost)],
+ ok.
+
+-spec delete(amqqueue:amqqueue(),
+ boolean(), boolean(),
+ rabbit_types:username()) ->
+ {ok, QLen :: non_neg_integer()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+delete(Q, true, _IfEmpty, _ActingUser) when ?amqqueue_is_quorum(Q) ->
+ {protocol_error, not_implemented,
+ "cannot delete ~s. queue.delete operations with if-unused flag set are not supported by quorum queues",
+ [rabbit_misc:rs(amqqueue:get_name(Q))]};
+delete(Q, _IfUnused, true, _ActingUser) when ?amqqueue_is_quorum(Q) ->
+ {protocol_error, not_implemented,
+ "cannot delete ~s. queue.delete operations with if-empty flag set are not supported by quorum queues",
+ [rabbit_misc:rs(amqqueue:get_name(Q))]};
+delete(Q, _IfUnused, _IfEmpty, ActingUser) when ?amqqueue_is_quorum(Q) ->
+ {Name, _} = amqqueue:get_pid(Q),
+ QName = amqqueue:get_name(Q),
+ QNodes = get_nodes(Q),
+ %% TODO Quorum queue needs to support consumer tracking for IfUnused
+ Timeout = ?DELETE_TIMEOUT,
+ {ok, ReadyMsgs, _} = stat(Q),
+ Servers = [{Name, Node} || Node <- QNodes],
+ case ra:delete_cluster(Servers, Timeout) of
+ {ok, {_, LeaderNode} = Leader} ->
+ MRef = erlang:monitor(process, Leader),
+ receive
+ {'DOWN', MRef, process, _, _} ->
+ ok
+ after Timeout ->
+ ok = force_delete_queue(Servers)
+ end,
+ ok = delete_queue_data(QName, ActingUser),
+ rpc:call(LeaderNode, rabbit_core_metrics, queue_deleted, [QName],
+ ?RPC_TIMEOUT),
+ {ok, ReadyMsgs};
+ {error, {no_more_servers_to_try, Errs}} ->
+ case lists:all(fun({{error, noproc}, _}) -> true;
+ (_) -> false
+ end, Errs) of
+ true ->
+ %% If all ra nodes were already down, the delete
+ %% has succeed
+ delete_queue_data(QName, ActingUser),
+ {ok, ReadyMsgs};
+ false ->
+ %% attempt forced deletion of all servers
+ rabbit_log:warning(
+ "Could not delete quorum queue '~s', not enough nodes "
+ " online to reach a quorum: ~255p."
+ " Attempting force delete.",
+ [rabbit_misc:rs(QName), Errs]),
+ ok = force_delete_queue(Servers),
+ delete_queue_data(QName, ActingUser),
+ {ok, ReadyMsgs}
+ end
+ end.
+
+force_delete_queue(Servers) ->
+ [begin
+ case catch(ra:force_delete_server(S)) of
+ ok -> ok;
+ Err ->
+ rabbit_log:warning(
+ "Force delete of ~w failed with: ~w"
+ "This may require manual data clean up~n",
+ [S, Err]),
+ ok
+ end
+ end || S <- Servers],
+ ok.
+
+delete_queue_data(QName, ActingUser) ->
+ _ = rabbit_amqqueue:internal_delete(QName, ActingUser),
+ ok.
+
+
+delete_immediately(Resource, {_Name, _} = QPid) ->
+ _ = rabbit_amqqueue:internal_delete(Resource, ?INTERNAL_USER),
+ {ok, _} = ra:delete_cluster([QPid]),
+ rabbit_core_metrics:queue_deleted(Resource),
+ ok.
+
+settle(complete, CTag, MsgIds, QState) ->
+ rabbit_fifo_client:settle(quorum_ctag(CTag), MsgIds, QState);
+settle(requeue, CTag, MsgIds, QState) ->
+ rabbit_fifo_client:return(quorum_ctag(CTag), MsgIds, QState);
+settle(discard, CTag, MsgIds, QState) ->
+ rabbit_fifo_client:discard(quorum_ctag(CTag), MsgIds, QState).
+
+credit(CTag, Credit, Drain, QState) ->
+ rabbit_fifo_client:credit(quorum_ctag(CTag), Credit, Drain, QState).
+
+-spec dequeue(NoAck :: boolean(), pid(),
+ rabbit_types:ctag(), rabbit_fifo_client:state()) ->
+ {empty, rabbit_fifo_client:state()} |
+ {ok, QLen :: non_neg_integer(), qmsg(), rabbit_fifo_client:state()} |
+ {error, term()}.
+dequeue(NoAck, _LimiterPid, CTag0, QState0) ->
+ CTag = quorum_ctag(CTag0),
+ Settlement = case NoAck of
+ true ->
+ settled;
+ false ->
+ unsettled
+ end,
+ rabbit_fifo_client:dequeue(CTag, Settlement, QState0).
+
+-spec consume(amqqueue:amqqueue(),
+ rabbit_queue_type:consume_spec(),
+ rabbit_fifo_client:state()) ->
+ {ok, rabbit_fifo_client:state(), rabbit_queue_type:actions()} |
+ {error, global_qos_not_supported_for_queue_type}.
+consume(Q, #{limiter_active := true}, _State)
+ when ?amqqueue_is_quorum(Q) ->
+ {error, global_qos_not_supported_for_queue_type};
+consume(Q, Spec, QState0) when ?amqqueue_is_quorum(Q) ->
+ #{no_ack := NoAck,
+ channel_pid := ChPid,
+ prefetch_count := ConsumerPrefetchCount,
+ consumer_tag := ConsumerTag0,
+ exclusive_consume := ExclusiveConsume,
+ args := Args,
+ ok_msg := OkMsg,
+ acting_user := ActingUser} = Spec,
+ %% TODO: validate consumer arguments
+ %% currently quorum queues do not support any arguments
+ QName = amqqueue:get_name(Q),
+ QPid = amqqueue:get_pid(Q),
+ maybe_send_reply(ChPid, OkMsg),
+ ConsumerTag = quorum_ctag(ConsumerTag0),
+ %% A prefetch count of 0 means no limitation,
+ %% let's make it into something large for ra
+ Prefetch0 = case ConsumerPrefetchCount of
+ 0 -> 2000;
+ Other -> Other
+ end,
+ %% consumer info is used to describe the consumer properties
+ AckRequired = not NoAck,
+ ConsumerMeta = #{ack => AckRequired,
+ prefetch => ConsumerPrefetchCount,
+ args => Args,
+ username => ActingUser},
+
+ {CreditMode, Credit, Drain} = parse_credit_args(Prefetch0, Args),
+ %% if the mode is credited we should send a separate credit command
+ %% after checkout and give 0 credits initally
+ Prefetch = case CreditMode of
+ credited -> 0;
+ simple_prefetch -> Prefetch0
+ end,
+ {ok, QState1} = rabbit_fifo_client:checkout(ConsumerTag, Prefetch,
+ CreditMode, ConsumerMeta,
+ QState0),
+ QState = case CreditMode of
+ credited when Credit > 0 ->
+ rabbit_fifo_client:credit(ConsumerTag, Credit, Drain,
+ QState1);
+ _ -> QState1
+ end,
+ case ra:local_query(QPid,
+ fun rabbit_fifo:query_single_active_consumer/1) of
+ {ok, {_, SacResult}, _} ->
+ SingleActiveConsumerOn = single_active_consumer_on(Q),
+ {IsSingleActiveConsumer, ActivityStatus} = case {SingleActiveConsumerOn, SacResult} of
+ {false, _} ->
+ {true, up};
+ {true, {value, {ConsumerTag, ChPid}}} ->
+ {true, single_active};
+ _ ->
+ {false, waiting}
+ end,
+ rabbit_core_metrics:consumer_created(
+ ChPid, ConsumerTag, ExclusiveConsume,
+ AckRequired, QName,
+ ConsumerPrefetchCount, IsSingleActiveConsumer,
+ ActivityStatus, Args),
+ emit_consumer_created(ChPid, ConsumerTag, ExclusiveConsume,
+ AckRequired, QName, Prefetch,
+ Args, none, ActingUser),
+ {ok, QState, []};
+ {error, Error} ->
+ Error;
+ {timeout, _} ->
+ {error, timeout}
+ end.
+
+% -spec basic_cancel(rabbit_types:ctag(), ChPid :: pid(), any(), rabbit_fifo_client:state()) ->
+% {'ok', rabbit_fifo_client:state()}.
+
+cancel(_Q, ConsumerTag, OkMsg, _ActingUser, State) ->
+ maybe_send_reply(self(), OkMsg),
+ rabbit_fifo_client:cancel_checkout(quorum_ctag(ConsumerTag), State).
+
+emit_consumer_created(ChPid, CTag, Exclusive, AckRequired, QName, PrefetchCount, Args, Ref, ActingUser) ->
+ rabbit_event:notify(consumer_created,
+ [{consumer_tag, CTag},
+ {exclusive, Exclusive},
+ {ack_required, AckRequired},
+ {channel, ChPid},
+ {queue, QName},
+ {prefetch_count, PrefetchCount},
+ {arguments, Args},
+ {user_who_performed_action, ActingUser}],
+ Ref).
+
+emit_consumer_deleted(ChPid, ConsumerTag, QName, ActingUser) ->
+ rabbit_event:notify(consumer_deleted,
+ [{consumer_tag, ConsumerTag},
+ {channel, ChPid},
+ {queue, QName},
+ {user_who_performed_action, ActingUser}]).
+
+-spec stateless_deliver(amqqueue:ra_server_id(), rabbit_types:delivery()) -> 'ok'.
+
+stateless_deliver(ServerId, Delivery) ->
+ ok = rabbit_fifo_client:untracked_enqueue([ServerId],
+ Delivery#delivery.message).
+
+-spec deliver(Confirm :: boolean(), rabbit_types:delivery(),
+ rabbit_fifo_client:state()) ->
+ {ok | slow, rabbit_fifo_client:state()} |
+ {reject_publish, rabbit_fifo_client:state()}.
+deliver(false, Delivery, QState0) ->
+ case rabbit_fifo_client:enqueue(Delivery#delivery.message, QState0) of
+ {ok, _} = Res -> Res;
+ {slow, _} = Res -> Res;
+ {reject_publish, State} ->
+ {ok, State}
+ end;
+deliver(true, Delivery, QState0) ->
+ rabbit_fifo_client:enqueue(Delivery#delivery.msg_seq_no,
+ Delivery#delivery.message, QState0).
+
+deliver(QSs, #delivery{confirm = Confirm} = Delivery) ->
+ lists:foldl(
+ fun({Q, stateless}, {Qs, Actions}) ->
+ QRef = amqqueue:get_pid(Q),
+ ok = rabbit_fifo_client:untracked_enqueue(
+ [QRef], Delivery#delivery.message),
+ {Qs, Actions};
+ ({Q, S0}, {Qs, Actions}) ->
+ case deliver(Confirm, Delivery, S0) of
+ {reject_publish, S} ->
+ Seq = Delivery#delivery.msg_seq_no,
+ QName = rabbit_fifo_client:cluster_name(S),
+ {[{Q, S} | Qs], [{rejected, QName, [Seq]} | Actions]};
+ {_, S} ->
+ {[{Q, S} | Qs], Actions}
+ end
+ end, {[], []}, QSs).
+
+
+state_info(S) ->
+ #{pending_raft_commands => rabbit_fifo_client:pending_size(S)}.
+
+
+
+-spec infos(rabbit_types:r('queue')) -> rabbit_types:infos().
+infos(QName) ->
+ infos(QName, ?STATISTICS_KEYS).
+
+infos(QName, Keys) ->
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} ->
+ info(Q, Keys);
+ {error, not_found} ->
+ []
+ end.
+
+info(Q, all_keys) ->
+ info(Q, ?INFO_KEYS);
+info(Q, Items) ->
+ lists:foldr(fun(totals, Acc) ->
+ i_totals(Q) ++ Acc;
+ (type_specific, Acc) ->
+ format(Q) ++ Acc;
+ (Item, Acc) ->
+ [{Item, i(Item, Q)} | Acc]
+ end, [], Items).
+
+-spec stat(amqqueue:amqqueue()) ->
+ {'ok', non_neg_integer(), non_neg_integer()}.
+stat(Q) when ?is_amqqueue(Q) ->
+ %% same short default timeout as in rabbit_fifo_client:stat/1
+ stat(Q, 250).
+
+-spec stat(amqqueue:amqqueue(), non_neg_integer()) -> {'ok', non_neg_integer(), non_neg_integer()}.
+
+stat(Q, Timeout) when ?is_amqqueue(Q) ->
+ Leader = amqqueue:get_pid(Q),
+ try
+ case rabbit_fifo_client:stat(Leader, Timeout) of
+ {ok, _, _} = Success -> Success;
+ {error, _} -> {ok, 0, 0};
+ {timeout, _} -> {ok, 0, 0}
+ end
+ catch
+ _:_ ->
+ %% Leader is not available, cluster might be in minority
+ {ok, 0, 0}
+ end.
+
+-spec purge(amqqueue:amqqueue()) ->
+ {ok, non_neg_integer()}.
+purge(Q) when ?is_amqqueue(Q) ->
+ Node = amqqueue:get_pid(Q),
+ rabbit_fifo_client:purge(Node).
+
+requeue(ConsumerTag, MsgIds, QState) ->
+ rabbit_fifo_client:return(quorum_ctag(ConsumerTag), MsgIds, QState).
+
+cleanup_data_dir() ->
+ Names = [begin
+ {Name, _} = amqqueue:get_pid(Q),
+ Name
+ end
+ || Q <- rabbit_amqqueue:list_by_type(?MODULE),
+ lists:member(node(), get_nodes(Q))],
+ NoQQClusters = rabbit_ra_registry:list_not_quorum_clusters(),
+ Registered = ra_directory:list_registered(),
+ Running = Names ++ NoQQClusters,
+ _ = [maybe_delete_data_dir(UId) || {Name, UId} <- Registered,
+ not lists:member(Name, Running)],
+ ok.
+
+maybe_delete_data_dir(UId) ->
+ Dir = ra_env:server_data_dir(UId),
+ {ok, Config} = ra_log:read_config(Dir),
+ case maps:get(machine, Config) of
+ {module, rabbit_fifo, _} ->
+ ra_lib:recursive_delete(Dir),
+ ra_directory:unregister_name(UId);
+ _ ->
+ ok
+ end.
+
+policy_changed(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ _ = rabbit_fifo_client:update_machine_state(QPid, ra_machine_config(Q)),
+ ok.
+
+-spec cluster_state(Name :: atom()) -> 'down' | 'recovering' | 'running'.
+
+cluster_state(Name) ->
+ case whereis(Name) of
+ undefined -> down;
+ _ ->
+ case ets:lookup(ra_state, Name) of
+ [{_, recover}] -> recovering;
+ _ -> running
+ end
+ end.
+
+-spec status(rabbit_types:vhost(), Name :: rabbit_misc:resource_name()) ->
+ [[{binary(), term()}]] | {error, term()}.
+status(Vhost, QueueName) ->
+ %% Handle not found queues
+ QName = #resource{virtual_host = Vhost, name = QueueName, kind = queue},
+ RName = qname_to_internal_name(QName),
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} when ?amqqueue_is_classic(Q) ->
+ {error, classic_queue_not_supported};
+ {ok, Q} when ?amqqueue_is_quorum(Q) ->
+ Nodes = get_nodes(Q),
+ [begin
+ case get_sys_status({RName, N}) of
+ {ok, Sys} ->
+ {_, M} = lists:keyfind(ra_server_state, 1, Sys),
+ {_, RaftState} = lists:keyfind(raft_state, 1, Sys),
+ #{commit_index := Commit,
+ machine_version := MacVer,
+ current_term := Term,
+ log := #{last_index := Last,
+ snapshot_index := SnapIdx}} = M,
+ [{<<"Node Name">>, N},
+ {<<"Raft State">>, RaftState},
+ {<<"Log Index">>, Last},
+ {<<"Commit Index">>, Commit},
+ {<<"Snapshot Index">>, SnapIdx},
+ {<<"Term">>, Term},
+ {<<"Machine Version">>, MacVer}
+ ];
+ {error, Err} ->
+ [{<<"Node Name">>, N},
+ {<<"Raft State">>, Err},
+ {<<"Log Index">>, <<>>},
+ {<<"Commit Index">>, <<>>},
+ {<<"Snapshot Index">>, <<>>},
+ {<<"Term">>, <<>>},
+ {<<"Machine Version">>, <<>>}
+ ]
+ end
+ end || N <- Nodes];
+ {error, not_found} = E ->
+ E
+ end.
+
+get_sys_status(Proc) ->
+ try lists:nth(5, element(4, sys:get_status(Proc))) of
+ Sys -> {ok, Sys}
+ catch
+ _:Err when is_tuple(Err) ->
+ {error, element(1, Err)};
+ _:_ ->
+ {error, other}
+
+ end.
+
+
+add_member(VHost, Name, Node, Timeout) ->
+ QName = #resource{virtual_host = VHost, name = Name, kind = queue},
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} when ?amqqueue_is_classic(Q) ->
+ {error, classic_queue_not_supported};
+ {ok, Q} when ?amqqueue_is_quorum(Q) ->
+ QNodes = get_nodes(Q),
+ case lists:member(Node, rabbit_nodes:all_running()) of
+ false ->
+ {error, node_not_running};
+ true ->
+ case lists:member(Node, QNodes) of
+ true ->
+ %% idempotent by design
+ ok;
+ false ->
+ add_member(Q, Node, Timeout)
+ end
+ end;
+ {error, not_found} = E ->
+ E
+ end.
+
+add_member(Q, Node, Timeout) when ?amqqueue_is_quorum(Q) ->
+ {RaName, _} = amqqueue:get_pid(Q),
+ QName = amqqueue:get_name(Q),
+ %% TODO parallel calls might crash this, or add a duplicate in quorum_nodes
+ ServerId = {RaName, Node},
+ Members = members(Q),
+ TickTimeout = application:get_env(rabbit, quorum_tick_interval,
+ ?TICK_TIMEOUT),
+ Conf = make_ra_conf(Q, ServerId, TickTimeout),
+ case ra:start_server(Conf) of
+ ok ->
+ case ra:add_member(Members, ServerId, Timeout) of
+ {ok, _, Leader} ->
+ Fun = fun(Q1) ->
+ Q2 = update_type_state(
+ Q1, fun(#{nodes := Nodes} = Ts) ->
+ Ts#{nodes => [Node | Nodes]}
+ end),
+ amqqueue:set_pid(Q2, Leader)
+ end,
+ rabbit_misc:execute_mnesia_transaction(
+ fun() -> rabbit_amqqueue:update(QName, Fun) end),
+ ok;
+ {timeout, _} ->
+ _ = ra:force_delete_server(ServerId),
+ _ = ra:remove_member(Members, ServerId),
+ {error, timeout};
+ E ->
+ _ = ra:force_delete_server(ServerId),
+ E
+ end;
+ E ->
+ E
+ end.
+
+delete_member(VHost, Name, Node) ->
+ QName = #resource{virtual_host = VHost, name = Name, kind = queue},
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} when ?amqqueue_is_classic(Q) ->
+ {error, classic_queue_not_supported};
+ {ok, Q} when ?amqqueue_is_quorum(Q) ->
+ QNodes = get_nodes(Q),
+ case lists:member(Node, QNodes) of
+ false ->
+ %% idempotent by design
+ ok;
+ true ->
+ delete_member(Q, Node)
+ end;
+ {error, not_found} = E ->
+ E
+ end.
+
+
+delete_member(Q, Node) when ?amqqueue_is_quorum(Q) ->
+ QName = amqqueue:get_name(Q),
+ {RaName, _} = amqqueue:get_pid(Q),
+ ServerId = {RaName, Node},
+ case members(Q) of
+ [{_, Node}] ->
+
+ %% deleting the last member is not allowed
+ {error, last_node};
+ Members ->
+ case ra:remove_member(Members, ServerId) of
+ {ok, _, _Leader} ->
+ Fun = fun(Q1) ->
+ update_type_state(
+ Q1,
+ fun(#{nodes := Nodes} = Ts) ->
+ Ts#{nodes => lists:delete(Node, Nodes)}
+ end)
+ end,
+ rabbit_misc:execute_mnesia_transaction(
+ fun() -> rabbit_amqqueue:update(QName, Fun) end),
+ case ra:force_delete_server(ServerId) of
+ ok ->
+ ok;
+ {error, {badrpc, nodedown}} ->
+ ok;
+ {error, {badrpc, {'EXIT', {badarg, _}}}} ->
+ %% DETS/ETS tables can't be found, application isn't running
+ ok;
+ {error, _} = Err ->
+ Err;
+ Err ->
+ {error, Err}
+ end;
+ {timeout, _} ->
+ {error, timeout};
+ E ->
+ E
+ end
+ end.
+
+-spec shrink_all(node()) ->
+ [{rabbit_amqqueue:name(),
+ {ok, pos_integer()} | {error, pos_integer(), term()}}].
+shrink_all(Node) ->
+ [begin
+ QName = amqqueue:get_name(Q),
+ rabbit_log:info("~s: removing member (replica) on node ~w",
+ [rabbit_misc:rs(QName), Node]),
+ Size = length(get_nodes(Q)),
+ case delete_member(Q, Node) of
+ ok ->
+ {QName, {ok, Size-1}};
+ {error, Err} ->
+ rabbit_log:warning("~s: failed to remove member (replica) on node ~w, error: ~w",
+ [rabbit_misc:rs(QName), Node, Err]),
+ {QName, {error, Size, Err}}
+ end
+ end || Q <- rabbit_amqqueue:list(),
+ amqqueue:get_type(Q) == ?MODULE,
+ lists:member(Node, get_nodes(Q))].
+
+-spec grow(node(), binary(), binary(), all | even) ->
+ [{rabbit_amqqueue:name(),
+ {ok, pos_integer()} | {error, pos_integer(), term()}}].
+grow(Node, VhostSpec, QueueSpec, Strategy) ->
+ Running = rabbit_nodes:all_running(),
+ [begin
+ Size = length(get_nodes(Q)),
+ QName = amqqueue:get_name(Q),
+ rabbit_log:info("~s: adding a new member (replica) on node ~w",
+ [rabbit_misc:rs(QName), Node]),
+ case add_member(Q, Node, ?ADD_MEMBER_TIMEOUT) of
+ ok ->
+ {QName, {ok, Size + 1}};
+ {error, Err} ->
+ rabbit_log:warning(
+ "~s: failed to add member (replica) on node ~w, error: ~w",
+ [rabbit_misc:rs(QName), Node, Err]),
+ {QName, {error, Size, Err}}
+ end
+ end
+ || Q <- rabbit_amqqueue:list(),
+ amqqueue:get_type(Q) == ?MODULE,
+ %% don't add a member if there is already one on the node
+ not lists:member(Node, get_nodes(Q)),
+ %% node needs to be running
+ lists:member(Node, Running),
+ matches_strategy(Strategy, get_nodes(Q)),
+ is_match(amqqueue:get_vhost(Q), VhostSpec) andalso
+ is_match(get_resource_name(amqqueue:get_name(Q)), QueueSpec) ].
+
+transfer_leadership(Q, Destination) ->
+ {RaName, _} = Pid = amqqueue:get_pid(Q),
+ case ra:transfer_leadership(Pid, {RaName, Destination}) of
+ ok ->
+ case ra:members(Pid) of
+ {_, _, {_, NewNode}} ->
+ {migrated, NewNode};
+ {timeout, _} ->
+ {not_migrated, ra_members_timeout}
+ end;
+ already_leader ->
+ {not_migrated, already_leader};
+ {error, Reason} ->
+ {not_migrated, Reason};
+ {timeout, _} ->
+ %% TODO should we retry once?
+ {not_migrated, timeout}
+ end.
+
+queue_length(Q) ->
+ Name = amqqueue:get_name(Q),
+ case ets:lookup(ra_metrics, Name) of
+ [] -> 0;
+ [{_, _, SnapIdx, _, _, LastIdx, _}] -> LastIdx - SnapIdx
+ end.
+
+get_replicas(Q) ->
+ get_nodes(Q).
+
+get_resource_name(#resource{name = Name}) ->
+ Name.
+
+matches_strategy(all, _) -> true;
+matches_strategy(even, Members) ->
+ length(Members) rem 2 == 0.
+
+is_match(Subj, E) ->
+ nomatch /= re:run(Subj, E).
+
+file_handle_leader_reservation(QName) ->
+ {ok, Q} = rabbit_amqqueue:lookup(QName),
+ ClusterSize = length(get_nodes(Q)),
+ file_handle_cache:set_reservation(2 + ClusterSize).
+
+file_handle_other_reservation() ->
+ file_handle_cache:set_reservation(2).
+
+file_handle_release_reservation() ->
+ file_handle_cache:release_reservation().
+
+-spec reclaim_memory(rabbit_types:vhost(), Name :: rabbit_misc:resource_name()) -> ok | {error, term()}.
+reclaim_memory(Vhost, QueueName) ->
+ QName = #resource{virtual_host = Vhost, name = QueueName, kind = queue},
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} when ?amqqueue_is_classic(Q) ->
+ {error, classic_queue_not_supported};
+ {ok, Q} when ?amqqueue_is_quorum(Q) ->
+ ok = ra:pipeline_command(amqqueue:get_pid(Q),
+ rabbit_fifo:make_garbage_collection());
+ {error, not_found} = E ->
+ E
+ end.
+
+%%----------------------------------------------------------------------------
+dlx_mfa(Q) ->
+ DLX = init_dlx(args_policy_lookup(<<"dead-letter-exchange">>,
+ fun res_arg/2, Q), Q),
+ DLXRKey = args_policy_lookup(<<"dead-letter-routing-key">>,
+ fun res_arg/2, Q),
+ {?MODULE, dead_letter_publish, [DLX, DLXRKey, amqqueue:get_name(Q)]}.
+
+init_dlx(undefined, _Q) ->
+ undefined;
+init_dlx(DLX, Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ rabbit_misc:r(QName, exchange, DLX).
+
+res_arg(_PolVal, ArgVal) -> ArgVal.
+
+dead_letter_publish(undefined, _, _, _) ->
+ ok;
+dead_letter_publish(X, RK, QName, ReasonMsgs) ->
+ case rabbit_exchange:lookup(X) of
+ {ok, Exchange} ->
+ [rabbit_dead_letter:publish(Msg, Reason, Exchange, RK, QName)
+ || {Reason, Msg} <- ReasonMsgs];
+ {error, not_found} ->
+ ok
+ end.
+
+find_quorum_queues(VHost) ->
+ Node = node(),
+ mnesia:async_dirty(
+ fun () ->
+ qlc:e(qlc:q([Q || Q <- mnesia:table(rabbit_durable_queue),
+ ?amqqueue_is_quorum(Q),
+ amqqueue:get_vhost(Q) =:= VHost,
+ amqqueue:qnode(Q) == Node]))
+ end).
+
+i_totals(Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ case ets:lookup(queue_coarse_metrics, QName) of
+ [{_, MR, MU, M, _}] ->
+ [{messages_ready, MR},
+ {messages_unacknowledged, MU},
+ {messages, M}];
+ [] ->
+ [{messages_ready, 0},
+ {messages_unacknowledged, 0},
+ {messages, 0}]
+ end.
+
+i(name, Q) when ?is_amqqueue(Q) -> amqqueue:get_name(Q);
+i(durable, Q) when ?is_amqqueue(Q) -> amqqueue:is_durable(Q);
+i(auto_delete, Q) when ?is_amqqueue(Q) -> amqqueue:is_auto_delete(Q);
+i(arguments, Q) when ?is_amqqueue(Q) -> amqqueue:get_arguments(Q);
+i(pid, Q) when ?is_amqqueue(Q) ->
+ {Name, _} = amqqueue:get_pid(Q),
+ whereis(Name);
+i(messages, Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ quorum_messages(QName);
+i(messages_ready, Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ case ets:lookup(queue_coarse_metrics, QName) of
+ [{_, MR, _, _, _}] ->
+ MR;
+ [] ->
+ 0
+ end;
+i(messages_unacknowledged, Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ case ets:lookup(queue_coarse_metrics, QName) of
+ [{_, _, MU, _, _}] ->
+ MU;
+ [] ->
+ 0
+ end;
+i(policy, Q) ->
+ case rabbit_policy:name(Q) of
+ none -> '';
+ Policy -> Policy
+ end;
+i(operator_policy, Q) ->
+ case rabbit_policy:name_op(Q) of
+ none -> '';
+ Policy -> Policy
+ end;
+i(effective_policy_definition, Q) ->
+ case rabbit_policy:effective_definition(Q) of
+ undefined -> [];
+ Def -> Def
+ end;
+i(consumers, Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ case ets:lookup(queue_metrics, QName) of
+ [{_, M, _}] ->
+ proplists:get_value(consumers, M, 0);
+ [] ->
+ 0
+ end;
+i(memory, Q) when ?is_amqqueue(Q) ->
+ {Name, _} = amqqueue:get_pid(Q),
+ try
+ {memory, M} = process_info(whereis(Name), memory),
+ M
+ catch
+ error:badarg ->
+ 0
+ end;
+i(state, Q) when ?is_amqqueue(Q) ->
+ {Name, Node} = amqqueue:get_pid(Q),
+ %% Check against the leader or last known leader
+ case rpc:call(Node, ?MODULE, cluster_state, [Name], ?RPC_TIMEOUT) of
+ {badrpc, _} -> down;
+ State -> State
+ end;
+i(local_state, Q) when ?is_amqqueue(Q) ->
+ {Name, _} = amqqueue:get_pid(Q),
+ case ets:lookup(ra_state, Name) of
+ [{_, State}] -> State;
+ _ -> not_member
+ end;
+i(garbage_collection, Q) when ?is_amqqueue(Q) ->
+ {Name, _} = amqqueue:get_pid(Q),
+ try
+ rabbit_misc:get_gc_info(whereis(Name))
+ catch
+ error:badarg ->
+ []
+ end;
+i(members, Q) when ?is_amqqueue(Q) ->
+ get_nodes(Q);
+i(online, Q) -> online(Q);
+i(leader, Q) -> leader(Q);
+i(open_files, Q) when ?is_amqqueue(Q) ->
+ {Name, _} = amqqueue:get_pid(Q),
+ Nodes = get_nodes(Q),
+ {Data, _} = rpc:multicall(Nodes, ?MODULE, open_files, [Name]),
+ lists:flatten(Data);
+i(single_active_consumer_pid, Q) when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ case ra:local_query(QPid, fun rabbit_fifo:query_single_active_consumer/1) of
+ {ok, {_, {value, {_ConsumerTag, ChPid}}}, _} ->
+ ChPid;
+ {ok, _, _} ->
+ '';
+ {error, _} ->
+ '';
+ {timeout, _} ->
+ ''
+ end;
+i(single_active_consumer_ctag, Q) when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ case ra:local_query(QPid,
+ fun rabbit_fifo:query_single_active_consumer/1) of
+ {ok, {_, {value, {ConsumerTag, _ChPid}}}, _} ->
+ ConsumerTag;
+ {ok, _, _} ->
+ '';
+ {error, _} ->
+ '';
+ {timeout, _} ->
+ ''
+ end;
+i(type, _) -> quorum;
+i(messages_ram, Q) when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ case ra:local_query(QPid,
+ fun rabbit_fifo:query_in_memory_usage/1) of
+ {ok, {_, {Length, _}}, _} ->
+ Length;
+ {error, _} ->
+ 0;
+ {timeout, _} ->
+ 0
+ end;
+i(message_bytes_ram, Q) when ?is_amqqueue(Q) ->
+ QPid = amqqueue:get_pid(Q),
+ case ra:local_query(QPid,
+ fun rabbit_fifo:query_in_memory_usage/1) of
+ {ok, {_, {_, Bytes}}, _} ->
+ Bytes;
+ {error, _} ->
+ 0;
+ {timeout, _} ->
+ 0
+ end;
+i(_K, _Q) -> ''.
+
+open_files(Name) ->
+ case whereis(Name) of
+ undefined -> {node(), 0};
+ Pid -> case ets:lookup(ra_open_file_metrics, Pid) of
+ [] -> {node(), 0};
+ [{_, Count}] -> {node(), Count}
+ end
+ end.
+
+leader(Q) when ?is_amqqueue(Q) ->
+ {Name, Leader} = amqqueue:get_pid(Q),
+ case is_process_alive(Name, Leader) of
+ true -> Leader;
+ false -> ''
+ end.
+
+peek(Vhost, Queue, Pos) ->
+ peek(Pos, rabbit_misc:r(Vhost, queue, Queue)).
+
+peek(Pos, #resource{} = QName) ->
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} ->
+ peek(Pos, Q);
+ Err ->
+ Err
+ end;
+peek(Pos, Q) when ?is_amqqueue(Q) andalso ?amqqueue_is_quorum(Q) ->
+ LeaderPid = amqqueue:get_pid(Q),
+ case ra:aux_command(LeaderPid, {peek, Pos}) of
+ {ok, {MsgHeader, Msg0}} ->
+ Count = case MsgHeader of
+ #{delivery_count := C} -> C;
+ _ -> 0
+ end,
+ Msg = rabbit_basic:add_header(<<"x-delivery-count">>, long,
+ Count, Msg0),
+ {ok, rabbit_basic:peek_fmt_message(Msg)};
+ {error, Err} ->
+ {error, Err};
+ Err ->
+ Err
+ end;
+peek(_Pos, Q) when ?is_amqqueue(Q) andalso ?amqqueue_is_classic(Q) ->
+ {error, classic_queue_not_supported}.
+
+online(Q) when ?is_amqqueue(Q) ->
+ Nodes = get_nodes(Q),
+ {Name, _} = amqqueue:get_pid(Q),
+ [Node || Node <- Nodes, is_process_alive(Name, Node)].
+
+format(Q) when ?is_amqqueue(Q) ->
+ Nodes = get_nodes(Q),
+ [{members, Nodes}, {online, online(Q)}, {leader, leader(Q)}].
+
+is_process_alive(Name, Node) ->
+ erlang:is_pid(rpc:call(Node, erlang, whereis, [Name], ?RPC_TIMEOUT)).
+
+-spec quorum_messages(rabbit_amqqueue:name()) -> non_neg_integer().
+
+quorum_messages(QName) ->
+ case ets:lookup(queue_coarse_metrics, QName) of
+ [{_, _, _, M, _}] ->
+ M;
+ [] ->
+ 0
+ end.
+
+quorum_ctag(Int) when is_integer(Int) ->
+ integer_to_binary(Int);
+quorum_ctag(Other) ->
+ Other.
+
+maybe_send_reply(_ChPid, undefined) -> ok;
+maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
+
+queue_name(RaFifoState) ->
+ rabbit_fifo_client:cluster_name(RaFifoState).
+
+get_default_quorum_initial_group_size(Arguments) ->
+ case rabbit_misc:table_lookup(Arguments, <<"x-quorum-initial-group-size">>) of
+ undefined -> application:get_env(rabbit, default_quorum_initial_group_size);
+ {_Type, Val} -> Val
+ end.
+
+select_quorum_nodes(Size, All) when length(All) =< Size ->
+ All;
+select_quorum_nodes(Size, All) ->
+ Node = node(),
+ case lists:member(Node, All) of
+ true ->
+ select_quorum_nodes(Size - 1, lists:delete(Node, All), [Node]);
+ false ->
+ select_quorum_nodes(Size, All, [])
+ end.
+
+select_quorum_nodes(0, _, Selected) ->
+ Selected;
+select_quorum_nodes(Size, Rest, Selected) ->
+ S = lists:nth(rand:uniform(length(Rest)), Rest),
+ select_quorum_nodes(Size - 1, lists:delete(S, Rest), [S | Selected]).
+
+%% member with the current leader first
+members(Q) when ?amqqueue_is_quorum(Q) ->
+ {RaName, LeaderNode} = amqqueue:get_pid(Q),
+ Nodes = lists:delete(LeaderNode, get_nodes(Q)),
+ [{RaName, N} || N <- [LeaderNode | Nodes]].
+
+format_ra_event(ServerId, Evt, QRef) ->
+ {'$gen_cast', {queue_event, QRef, {ServerId, Evt}}}.
+
+make_ra_conf(Q, ServerId, TickTimeout) ->
+ QName = amqqueue:get_name(Q),
+ RaMachine = ra_machine(Q),
+ [{ClusterName, _} | _] = Members = members(Q),
+ UId = ra:new_uid(ra_lib:to_binary(ClusterName)),
+ FName = rabbit_misc:rs(QName),
+ Formatter = {?MODULE, format_ra_event, [QName]},
+ #{cluster_name => ClusterName,
+ id => ServerId,
+ uid => UId,
+ friendly_name => FName,
+ metrics_key => QName,
+ initial_members => Members,
+ log_init_args => #{uid => UId},
+ tick_timeout => TickTimeout,
+ machine => RaMachine,
+ ra_event_formatter => Formatter}.
+
+get_nodes(Q) when ?is_amqqueue(Q) ->
+ #{nodes := Nodes} = amqqueue:get_type_state(Q),
+ Nodes.
+
+update_type_state(Q, Fun) when ?is_amqqueue(Q) ->
+ Ts = amqqueue:get_type_state(Q),
+ amqqueue:set_type_state(Q, Fun(Ts)).
+
+overflow(undefined, Def, _QName) -> Def;
+overflow(<<"reject-publish">>, _Def, _QName) -> reject_publish;
+overflow(<<"drop-head">>, _Def, _QName) -> drop_head;
+overflow(<<"reject-publish-dlx">> = V, Def, QName) ->
+ rabbit_log:warning("Invalid overflow strategy ~p for quorum queue: ~p",
+ [V, rabbit_misc:rs(QName)]),
+ Def.
+
+parse_credit_args(Default, Args) ->
+ case rabbit_misc:table_lookup(Args, <<"x-credit">>) of
+ {table, T} ->
+ case {rabbit_misc:table_lookup(T, <<"credit">>),
+ rabbit_misc:table_lookup(T, <<"drain">>)} of
+ {{long, C}, {bool, D}} ->
+ {credited, C, D};
+ _ ->
+ {simple_prefetch, Default, false}
+ end;
+ undefined ->
+ {simple_prefetch, Default, false}
+ end.
diff --git a/deps/rabbit/src/rabbit_ra_registry.erl b/deps/rabbit/src/rabbit_ra_registry.erl
new file mode 100644
index 0000000000..b02d89eda5
--- /dev/null
+++ b/deps/rabbit/src/rabbit_ra_registry.erl
@@ -0,0 +1,25 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at https://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is GoPivotal, Inc.
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_ra_registry).
+
+-export([list_not_quorum_clusters/0]).
+
+%% Not all ra clusters are quorum queues. We need to keep a list of these so we don't
+%% take them into account in operations such as memory calculation and data cleanup.
+%% Hardcoded atm
+list_not_quorum_clusters() ->
+ [rabbit_stream_coordinator].
diff --git a/deps/rabbit/src/rabbit_reader.erl b/deps/rabbit/src/rabbit_reader.erl
new file mode 100644
index 0000000000..c91dbbc105
--- /dev/null
+++ b/deps/rabbit/src/rabbit_reader.erl
@@ -0,0 +1,1803 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_reader).
+
+%% Transitional step until we can require Erlang/OTP 21 and
+%% use the now recommended try/catch syntax for obtaining the stack trace.
+-compile(nowarn_deprecated_function).
+
+%% This is an AMQP 0-9-1 connection implementation. If AMQP 1.0 plugin is enabled,
+%% this module passes control of incoming AMQP 1.0 connections to it.
+%%
+%% Every connection (as in, a process using this module)
+%% is a controlling process for a server socket.
+%%
+%% Connections have a number of responsibilities:
+%%
+%% * Performing protocol handshake
+%% * Parsing incoming data and dispatching protocol methods
+%% * Authenticating clients (with the help of authentication backends)
+%% * Enforcing TCP backpressure (throttling clients)
+%% * Enforcing connection limits, e.g. channel_max
+%% * Channel management
+%% * Setting up heartbeater and alarm notifications
+%% * Emitting connection and network activity metric events
+%% * Gracefully handling client disconnects, channel termination, etc
+%%
+%% and a few more.
+%%
+%% Every connection has
+%%
+%% * a queue collector which is responsible for keeping
+%% track of exclusive queues on the connection and their cleanup.
+%% * a heartbeater that's responsible for sending heartbeat frames to clients,
+%% keeping track of the incoming ones and notifying connection about
+%% heartbeat timeouts
+%% * Stats timer, a timer that is used to periodically emit metric events
+%%
+%% Some dependencies are started under a separate supervisor to avoid deadlocks
+%% during system shutdown. See rabbit_channel_sup:start_link/0 for details.
+%%
+%% Reader processes are special processes (in the OTP sense).
+
+-include("rabbit_framing.hrl").
+-include("rabbit.hrl").
+
+-export([start_link/2, info_keys/0, info/1, info/2, force_event_refresh/2,
+ shutdown/2]).
+
+-export([system_continue/3, system_terminate/4, system_code_change/4]).
+
+-export([init/3, mainloop/4, recvloop/4]).
+
+-export([conserve_resources/3, server_properties/1]).
+
+-define(NORMAL_TIMEOUT, 3).
+-define(CLOSING_TIMEOUT, 30).
+-define(CHANNEL_TERMINATION_TIMEOUT, 3).
+%% we wait for this many seconds before closing TCP connection
+%% with a client that failed to log in. Provides some relief
+%% from connection storms and DoS.
+-define(SILENT_CLOSE_DELAY, 3).
+-define(CHANNEL_MIN, 1).
+
+%%--------------------------------------------------------------------------
+
+-record(v1, {
+ %% parent process
+ parent,
+ %% socket
+ sock,
+ %% connection state, see connection record
+ connection,
+ callback,
+ recv_len,
+ pending_recv,
+ %% pre_init | securing | running | blocking | blocked | closing | closed | {become, F}
+ connection_state,
+ %% see comment in rabbit_connection_sup:start_link/0
+ helper_sup,
+ %% takes care of cleaning up exclusive queues,
+ %% see rabbit_queue_collector
+ queue_collector,
+ %% sends and receives heartbeat frames,
+ %% see rabbit_heartbeat
+ heartbeater,
+ %% timer used to emit statistics
+ stats_timer,
+ %% channel supervisor
+ channel_sup_sup_pid,
+ %% how many channels this connection has
+ channel_count,
+ %% throttling state, for both
+ %% credit- and resource-driven flow control
+ throttle,
+ proxy_socket}).
+
+-record(throttle, {
+ %% never | timestamp()
+ last_blocked_at,
+ %% a set of the reasons why we are
+ %% blocked: {resource, memory}, {resource, disk}.
+ %% More reasons can be added in the future.
+ blocked_by,
+ %% true if received any publishes, false otherwise
+ %% note that this will also be true when connection is
+ %% already blocked
+ should_block,
+ %% true if we had we sent a connection.blocked,
+ %% false otherwise
+ connection_blocked_message_sent
+}).
+
+-define(STATISTICS_KEYS, [pid, recv_oct, recv_cnt, send_oct, send_cnt,
+ send_pend, state, channels, reductions,
+ garbage_collection]).
+
+-define(SIMPLE_METRICS, [pid, recv_oct, send_oct, reductions]).
+-define(OTHER_METRICS, [recv_cnt, send_cnt, send_pend, state, channels,
+ garbage_collection]).
+
+-define(CREATION_EVENT_KEYS,
+ [pid, name, port, peer_port, host,
+ peer_host, ssl, peer_cert_subject, peer_cert_issuer,
+ peer_cert_validity, auth_mechanism, ssl_protocol,
+ ssl_key_exchange, ssl_cipher, ssl_hash, protocol, user, vhost,
+ timeout, frame_max, channel_max, client_properties, connected_at,
+ node, user_who_performed_action]).
+
+-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
+
+-define(AUTH_NOTIFICATION_INFO_KEYS,
+ [host, name, peer_host, peer_port, protocol, auth_mechanism,
+ ssl, ssl_protocol, ssl_cipher, peer_cert_issuer, peer_cert_subject,
+ peer_cert_validity]).
+
+-define(IS_RUNNING(State),
+ (State#v1.connection_state =:= running orelse
+ State#v1.connection_state =:= blocked)).
+
+-define(IS_STOPPING(State),
+ (State#v1.connection_state =:= closing orelse
+ State#v1.connection_state =:= closed)).
+
+%%--------------------------------------------------------------------------
+
+-type resource_alert() :: {WasAlarmSetForNode :: boolean(),
+ IsThereAnyAlarmsWithSameSourceInTheCluster :: boolean(),
+ NodeForWhichAlarmWasSetOrCleared :: node()}.
+
+%%--------------------------------------------------------------------------
+
+-spec start_link(pid(), any()) -> rabbit_types:ok(pid()).
+
+start_link(HelperSup, Ref) ->
+ Pid = proc_lib:spawn_link(?MODULE, init, [self(), HelperSup, Ref]),
+
+ {ok, Pid}.
+
+-spec shutdown(pid(), string()) -> 'ok'.
+
+shutdown(Pid, Explanation) ->
+ gen_server:call(Pid, {shutdown, Explanation}, infinity).
+
+-spec init(pid(), pid(), any()) -> no_return().
+
+init(Parent, HelperSup, Ref) ->
+ ?LG_PROCESS_TYPE(reader),
+ {ok, Sock} = rabbit_networking:handshake(Ref,
+ application:get_env(rabbit, proxy_protocol, false)),
+ Deb = sys:debug_options([]),
+ start_connection(Parent, HelperSup, Deb, Sock).
+
+-spec system_continue(_,_,{[binary()], non_neg_integer(), #v1{}}) -> any().
+
+system_continue(Parent, Deb, {Buf, BufLen, State}) ->
+ mainloop(Deb, Buf, BufLen, State#v1{parent = Parent}).
+
+-spec system_terminate(_,_,_,_) -> no_return().
+
+system_terminate(Reason, _Parent, _Deb, _State) ->
+ exit(Reason).
+
+-spec system_code_change(_,_,_,_) -> {'ok',_}.
+
+system_code_change(Misc, _Module, _OldVsn, _Extra) ->
+ {ok, Misc}.
+
+-spec info_keys() -> rabbit_types:info_keys().
+
+info_keys() -> ?INFO_KEYS.
+
+-spec info(pid()) -> rabbit_types:infos().
+
+info(Pid) ->
+ gen_server:call(Pid, info, infinity).
+
+-spec info(pid(), rabbit_types:info_keys()) -> rabbit_types:infos().
+
+info(Pid, Items) ->
+ case gen_server:call(Pid, {info, Items}, infinity) of
+ {ok, Res} -> Res;
+ {error, Error} -> throw(Error)
+ end.
+
+-spec force_event_refresh(pid(), reference()) -> 'ok'.
+
+% Note: https://www.pivotaltracker.com/story/show/166962656
+% This event is necessary for the stats timer to be initialized with
+% the correct values once the management agent has started
+force_event_refresh(Pid, Ref) ->
+ gen_server:cast(Pid, {force_event_refresh, Ref}).
+
+-spec conserve_resources(pid(), atom(), resource_alert()) -> 'ok'.
+
+conserve_resources(Pid, Source, {_, Conserve, _}) ->
+ Pid ! {conserve_resources, Source, Conserve},
+ ok.
+
+-spec server_properties(rabbit_types:protocol()) ->
+ rabbit_framing:amqp_table().
+
+server_properties(Protocol) ->
+ {ok, Product} = application:get_key(rabbit, description),
+ {ok, Version} = application:get_key(rabbit, vsn),
+
+ %% Get any configuration-specified server properties
+ {ok, RawConfigServerProps} = application:get_env(rabbit,
+ server_properties),
+
+ %% Normalize the simplified (2-tuple) and unsimplified (3-tuple) forms
+ %% from the config and merge them with the generated built-in properties
+ NormalizedConfigServerProps =
+ [{<<"capabilities">>, table, server_capabilities(Protocol)} |
+ [case X of
+ {KeyAtom, Value} -> {list_to_binary(atom_to_list(KeyAtom)),
+ longstr,
+ maybe_list_to_binary(Value)};
+ {BinKey, Type, Value} -> {BinKey, Type, Value}
+ end || X <- RawConfigServerProps ++
+ [{product, Product},
+ {version, Version},
+ {cluster_name, rabbit_nodes:cluster_name()},
+ {platform, rabbit_misc:platform_and_version()},
+ {copyright, ?COPYRIGHT_MESSAGE},
+ {information, ?INFORMATION_MESSAGE}]]],
+
+ %% Filter duplicated properties in favour of config file provided values
+ lists:usort(fun ({K1,_,_}, {K2,_,_}) -> K1 =< K2 end,
+ NormalizedConfigServerProps).
+
+maybe_list_to_binary(V) when is_binary(V) -> V;
+maybe_list_to_binary(V) when is_list(V) -> list_to_binary(V).
+
+server_capabilities(rabbit_framing_amqp_0_9_1) ->
+ [{<<"publisher_confirms">>, bool, true},
+ {<<"exchange_exchange_bindings">>, bool, true},
+ {<<"basic.nack">>, bool, true},
+ {<<"consumer_cancel_notify">>, bool, true},
+ {<<"connection.blocked">>, bool, true},
+ {<<"consumer_priorities">>, bool, true},
+ {<<"authentication_failure_close">>, bool, true},
+ {<<"per_consumer_qos">>, bool, true},
+ {<<"direct_reply_to">>, bool, true}];
+server_capabilities(_) ->
+ [].
+
+%%--------------------------------------------------------------------------
+
+socket_error(Reason) when is_atom(Reason) ->
+ rabbit_log_connection:error("Error on AMQP connection ~p: ~s~n",
+ [self(), rabbit_misc:format_inet_error(Reason)]);
+socket_error(Reason) ->
+ Fmt = "Error on AMQP connection ~p:~n~p~n",
+ Args = [self(), Reason],
+ case Reason of
+ %% The socket was closed while upgrading to SSL.
+ %% This is presumably a TCP healthcheck, so don't log
+ %% it unless specified otherwise.
+ {ssl_upgrade_error, closed} ->
+ %% Lager sinks (rabbit_log_connection)
+ %% are handled by the lager parse_transform.
+ %% Hence have to define the loglevel as a function call.
+ rabbit_log_connection:debug(Fmt, Args);
+ _ ->
+ rabbit_log_connection:error(Fmt, Args)
+ end.
+
+inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F).
+
+socket_op(Sock, Fun) ->
+ RealSocket = rabbit_net:unwrap_socket(Sock),
+ case Fun(Sock) of
+ {ok, Res} -> Res;
+ {error, Reason} -> socket_error(Reason),
+ rabbit_net:fast_close(RealSocket),
+ exit(normal)
+ end.
+
+-spec start_connection(pid(), pid(), any(), rabbit_net:socket()) ->
+ no_return().
+
+start_connection(Parent, HelperSup, Deb, Sock) ->
+ process_flag(trap_exit, true),
+ RealSocket = rabbit_net:unwrap_socket(Sock),
+ Name = case rabbit_net:connection_string(Sock, inbound) of
+ {ok, Str} -> list_to_binary(Str);
+ {error, enotconn} -> rabbit_net:fast_close(RealSocket),
+ exit(normal);
+ {error, Reason} -> socket_error(Reason),
+ rabbit_net:fast_close(RealSocket),
+ exit(normal)
+ end,
+ {ok, HandshakeTimeout} = application:get_env(rabbit, handshake_timeout),
+ InitialFrameMax = application:get_env(rabbit, initial_frame_max, ?FRAME_MIN_SIZE),
+ erlang:send_after(HandshakeTimeout, self(), handshake_timeout),
+ {PeerHost, PeerPort, Host, Port} =
+ socket_op(Sock, fun (S) -> rabbit_net:socket_ends(S, inbound) end),
+ ?store_proc_name(Name),
+ State = #v1{parent = Parent,
+ sock = RealSocket,
+ connection = #connection{
+ name = Name,
+ log_name = Name,
+ host = Host,
+ peer_host = PeerHost,
+ port = Port,
+ peer_port = PeerPort,
+ protocol = none,
+ user = none,
+ timeout_sec = (HandshakeTimeout / 1000),
+ frame_max = InitialFrameMax,
+ vhost = none,
+ client_properties = none,
+ capabilities = [],
+ auth_mechanism = none,
+ auth_state = none,
+ connected_at = os:system_time(
+ milli_seconds)},
+ callback = uninitialized_callback,
+ recv_len = 0,
+ pending_recv = false,
+ connection_state = pre_init,
+ queue_collector = undefined, %% started on tune-ok
+ helper_sup = HelperSup,
+ heartbeater = none,
+ channel_sup_sup_pid = none,
+ channel_count = 0,
+ throttle = #throttle{
+ last_blocked_at = never,
+ should_block = false,
+ blocked_by = sets:new(),
+ connection_blocked_message_sent = false
+ },
+ proxy_socket = rabbit_net:maybe_get_proxy_socket(Sock)},
+ try
+ case run({?MODULE, recvloop,
+ [Deb, [], 0, switch_callback(rabbit_event:init_stats_timer(
+ State, #v1.stats_timer),
+ handshake, 8)]}) of
+ %% connection was closed cleanly by the client
+ #v1{connection = #connection{user = #user{username = Username},
+ vhost = VHost}} ->
+ rabbit_log_connection:info("closing AMQP connection ~p (~s, vhost: '~s', user: '~s')~n",
+ [self(), dynamic_connection_name(Name), VHost, Username]);
+ %% just to be more defensive
+ _ ->
+ rabbit_log_connection:info("closing AMQP connection ~p (~s)~n",
+ [self(), dynamic_connection_name(Name)])
+ end
+ catch
+ Ex ->
+ log_connection_exception(dynamic_connection_name(Name), Ex)
+ after
+ %% We don't call gen_tcp:close/1 here since it waits for
+ %% pending output to be sent, which results in unnecessary
+ %% delays. We could just terminate - the reader is the
+ %% controlling process and hence its termination will close
+ %% the socket. However, to keep the file_handle_cache
+ %% accounting as accurate as possible we ought to close the
+ %% socket w/o delay before termination.
+ rabbit_net:fast_close(RealSocket),
+ rabbit_networking:unregister_connection(self()),
+ rabbit_core_metrics:connection_closed(self()),
+ ClientProperties = case get(client_properties) of
+ undefined ->
+ [];
+ Properties ->
+ Properties
+ end,
+ EventProperties = [{name, Name},
+ {pid, self()},
+ {node, node()},
+ {client_properties, ClientProperties}],
+ EventProperties1 = case get(connection_user_provided_name) of
+ undefined ->
+ EventProperties;
+ ConnectionUserProvidedName ->
+ [{user_provided_name, ConnectionUserProvidedName} | EventProperties]
+ end,
+ rabbit_event:notify(connection_closed, EventProperties1)
+ end,
+ done.
+
+log_connection_exception(Name, Ex) ->
+ Severity = case Ex of
+ connection_closed_with_no_data_received -> debug;
+ {connection_closed_abruptly, _} -> warning;
+ connection_closed_abruptly -> warning;
+ _ -> error
+ end,
+ log_connection_exception(Severity, Name, Ex).
+
+log_connection_exception(Severity, Name, {heartbeat_timeout, TimeoutSec}) ->
+ %% Long line to avoid extra spaces and line breaks in log
+ log_connection_exception_with_severity(Severity,
+ "closing AMQP connection ~p (~s):~n"
+ "missed heartbeats from client, timeout: ~ps~n",
+ [self(), Name, TimeoutSec]);
+log_connection_exception(Severity, Name, {connection_closed_abruptly,
+ #v1{connection = #connection{user = #user{username = Username},
+ vhost = VHost}}}) ->
+ log_connection_exception_with_severity(Severity,
+ "closing AMQP connection ~p (~s, vhost: '~s', user: '~s'):~nclient unexpectedly closed TCP connection~n",
+ [self(), Name, VHost, Username]);
+%% when client abruptly closes connection before connection.open/authentication/authorization
+%% succeeded, don't log username and vhost as 'none'
+log_connection_exception(Severity, Name, {connection_closed_abruptly, _}) ->
+ log_connection_exception_with_severity(Severity,
+ "closing AMQP connection ~p (~s):~nclient unexpectedly closed TCP connection~n",
+ [self(), Name]);
+%% failed connection.tune negotiations
+log_connection_exception(Severity, Name, {handshake_error, tuning, _Channel,
+ {exit, #amqp_error{explanation = Explanation},
+ _Method, _Stacktrace}}) ->
+ log_connection_exception_with_severity(Severity,
+ "closing AMQP connection ~p (~s):~nfailed to negotiate connection parameters: ~s~n",
+ [self(), Name, Explanation]);
+%% old exception structure
+log_connection_exception(Severity, Name, connection_closed_abruptly) ->
+ log_connection_exception_with_severity(Severity,
+ "closing AMQP connection ~p (~s):~n"
+ "client unexpectedly closed TCP connection~n",
+ [self(), Name]);
+log_connection_exception(Severity, Name, Ex) ->
+ log_connection_exception_with_severity(Severity,
+ "closing AMQP connection ~p (~s):~n~p~n",
+ [self(), Name, Ex]).
+
+log_connection_exception_with_severity(Severity, Fmt, Args) ->
+ case Severity of
+ debug -> rabbit_log_connection:debug(Fmt, Args);
+ warning -> rabbit_log_connection:warning(Fmt, Args);
+ error -> rabbit_log_connection:error(Fmt, Args)
+ end.
+
+run({M, F, A}) ->
+ try apply(M, F, A)
+ catch {become, MFA} -> run(MFA)
+ end.
+
+recvloop(Deb, Buf, BufLen, State = #v1{pending_recv = true}) ->
+ mainloop(Deb, Buf, BufLen, State);
+recvloop(Deb, Buf, BufLen, State = #v1{connection_state = blocked}) ->
+ mainloop(Deb, Buf, BufLen, State);
+recvloop(Deb, Buf, BufLen, State = #v1{connection_state = {become, F}}) ->
+ throw({become, F(Deb, Buf, BufLen, State)});
+recvloop(Deb, Buf, BufLen, State = #v1{sock = Sock, recv_len = RecvLen})
+ when BufLen < RecvLen ->
+ case rabbit_net:setopts(Sock, [{active, once}]) of
+ ok -> mainloop(Deb, Buf, BufLen,
+ State#v1{pending_recv = true});
+ {error, Reason} -> stop(Reason, State)
+ end;
+recvloop(Deb, [B], _BufLen, State) ->
+ {Rest, State1} = handle_input(State#v1.callback, B, State),
+ recvloop(Deb, [Rest], size(Rest), State1);
+recvloop(Deb, Buf, BufLen, State = #v1{recv_len = RecvLen}) ->
+ {DataLRev, RestLRev} = binlist_split(BufLen - RecvLen, Buf, []),
+ Data = list_to_binary(lists:reverse(DataLRev)),
+ {<<>>, State1} = handle_input(State#v1.callback, Data, State),
+ recvloop(Deb, lists:reverse(RestLRev), BufLen - RecvLen, State1).
+
+binlist_split(0, L, Acc) ->
+ {L, Acc};
+binlist_split(Len, L, [Acc0|Acc]) when Len < 0 ->
+ {H, T} = split_binary(Acc0, -Len),
+ {[H|L], [T|Acc]};
+binlist_split(Len, [H|T], Acc) ->
+ binlist_split(Len - size(H), T, [H|Acc]).
+
+-spec mainloop(_,[binary()], non_neg_integer(), #v1{}) -> any().
+
+mainloop(Deb, Buf, BufLen, State = #v1{sock = Sock,
+ connection_state = CS,
+ connection = #connection{
+ name = ConnName}}) ->
+ Recv = rabbit_net:recv(Sock),
+ case CS of
+ pre_init when Buf =:= [] ->
+ %% We only log incoming connections when either the
+ %% first byte was received or there was an error (eg. a
+ %% timeout).
+ %%
+ %% The goal is to not log TCP healthchecks (a connection
+ %% with no data received) unless specified otherwise.
+ Fmt = "accepting AMQP connection ~p (~s)~n",
+ Args = [self(), ConnName],
+ case Recv of
+ closed -> rabbit_log_connection:debug(Fmt, Args);
+ _ -> rabbit_log_connection:info(Fmt, Args)
+ end;
+ _ ->
+ ok
+ end,
+ case Recv of
+ {data, Data} ->
+ recvloop(Deb, [Data | Buf], BufLen + size(Data),
+ State#v1{pending_recv = false});
+ closed when State#v1.connection_state =:= closed ->
+ State;
+ closed when CS =:= pre_init andalso Buf =:= [] ->
+ stop(tcp_healthcheck, State);
+ closed ->
+ stop(closed, State);
+ {other, {heartbeat_send_error, Reason}} ->
+ %% The only portable way to detect disconnect on blocked
+ %% connection is to wait for heartbeat send failure.
+ stop(Reason, State);
+ {error, Reason} ->
+ stop(Reason, State);
+ {other, {system, From, Request}} ->
+ sys:handle_system_msg(Request, From, State#v1.parent,
+ ?MODULE, Deb, {Buf, BufLen, State});
+ {other, Other} ->
+ case handle_other(Other, State) of
+ stop -> State;
+ NewState -> recvloop(Deb, Buf, BufLen, NewState)
+ end
+ end.
+
+-spec stop(_, #v1{}) -> no_return().
+stop(tcp_healthcheck, State) ->
+ %% The connection was closed before any packet was received. It's
+ %% probably a load-balancer healthcheck: don't consider this a
+ %% failure.
+ maybe_emit_stats(State),
+ throw(connection_closed_with_no_data_received);
+stop(closed, State) ->
+ maybe_emit_stats(State),
+ throw({connection_closed_abruptly, State});
+stop(Reason, State) ->
+ maybe_emit_stats(State),
+ throw({inet_error, Reason}).
+
+handle_other({conserve_resources, Source, Conserve},
+ State = #v1{throttle = Throttle = #throttle{blocked_by = Blockers}}) ->
+ Resource = {resource, Source},
+ Blockers1 = case Conserve of
+ true -> sets:add_element(Resource, Blockers);
+ false -> sets:del_element(Resource, Blockers)
+ end,
+ control_throttle(State#v1{throttle = Throttle#throttle{blocked_by = Blockers1}});
+handle_other({channel_closing, ChPid}, State) ->
+ ok = rabbit_channel:ready_for_close(ChPid),
+ {_, State1} = channel_cleanup(ChPid, State),
+ maybe_close(control_throttle(State1));
+handle_other({'EXIT', Parent, normal}, State = #v1{parent = Parent}) ->
+ %% rabbitmq/rabbitmq-server#544
+ %% The connection port process has exited due to the TCP socket being closed.
+ %% Handle this case in the same manner as receiving {error, closed}
+ stop(closed, State);
+handle_other({'EXIT', Parent, Reason}, State = #v1{parent = Parent}) ->
+ Msg = io_lib:format("broker forced connection closure with reason '~w'", [Reason]),
+ terminate(Msg, State),
+ %% this is what we are expected to do according to
+ %% https://www.erlang.org/doc/man/sys.html
+ %%
+ %% If we wanted to be *really* nice we should wait for a while for
+ %% clients to close the socket at their end, just as we do in the
+ %% ordinary error case. However, since this termination is
+ %% initiated by our parent it is probably more important to exit
+ %% quickly.
+ maybe_emit_stats(State),
+ exit(Reason);
+handle_other({channel_exit, _Channel, E = {writer, send_failed, _E}}, State) ->
+ maybe_emit_stats(State),
+ throw(E);
+handle_other({channel_exit, Channel, Reason}, State) ->
+ handle_exception(State, Channel, Reason);
+handle_other({'DOWN', _MRef, process, ChPid, Reason}, State) ->
+ handle_dependent_exit(ChPid, Reason, State);
+handle_other(terminate_connection, State) ->
+ maybe_emit_stats(State),
+ stop;
+handle_other(handshake_timeout, State)
+ when ?IS_RUNNING(State) orelse ?IS_STOPPING(State) ->
+ State;
+handle_other(handshake_timeout, State) ->
+ maybe_emit_stats(State),
+ throw({handshake_timeout, State#v1.callback});
+handle_other(heartbeat_timeout, State = #v1{connection_state = closed}) ->
+ State;
+handle_other(heartbeat_timeout,
+ State = #v1{connection = #connection{timeout_sec = T}}) ->
+ maybe_emit_stats(State),
+ throw({heartbeat_timeout, T});
+handle_other({'$gen_call', From, {shutdown, Explanation}}, State) ->
+ {ForceTermination, NewState} = terminate(Explanation, State),
+ gen_server:reply(From, ok),
+ case ForceTermination of
+ force -> stop;
+ normal -> NewState
+ end;
+handle_other({'$gen_call', From, info}, State) ->
+ gen_server:reply(From, infos(?INFO_KEYS, State)),
+ State;
+handle_other({'$gen_call', From, {info, Items}}, State) ->
+ gen_server:reply(From, try {ok, infos(Items, State)}
+ catch Error -> {error, Error}
+ end),
+ State;
+handle_other({'$gen_cast', {force_event_refresh, Ref}}, State)
+ when ?IS_RUNNING(State) ->
+ rabbit_event:notify(
+ connection_created,
+ augment_infos_with_user_provided_connection_name(
+ [{type, network} | infos(?CREATION_EVENT_KEYS, State)], State),
+ Ref),
+ rabbit_event:init_stats_timer(State, #v1.stats_timer);
+handle_other({'$gen_cast', {force_event_refresh, _Ref}}, State) ->
+ %% Ignore, we will emit a created event once we start running.
+ State;
+handle_other(ensure_stats, State) ->
+ ensure_stats_timer(State);
+handle_other(emit_stats, State) ->
+ emit_stats(State);
+handle_other({bump_credit, Msg}, State) ->
+ %% Here we are receiving credit by some channel process.
+ credit_flow:handle_bump_msg(Msg),
+ control_throttle(State);
+handle_other(Other, State) ->
+ %% internal error -> something worth dying for
+ maybe_emit_stats(State),
+ exit({unexpected_message, Other}).
+
+switch_callback(State, Callback, Length) ->
+ State#v1{callback = Callback, recv_len = Length}.
+
+terminate(Explanation, State) when ?IS_RUNNING(State) ->
+ {normal, handle_exception(State, 0,
+ rabbit_misc:amqp_error(
+ connection_forced, "~s", [Explanation], none))};
+terminate(_Explanation, State) ->
+ {force, State}.
+
+send_blocked(#v1{connection = #connection{protocol = Protocol,
+ capabilities = Capabilities},
+ sock = Sock}, Reason) ->
+ case rabbit_misc:table_lookup(Capabilities, <<"connection.blocked">>) of
+ {bool, true} ->
+
+ ok = send_on_channel0(Sock, #'connection.blocked'{reason = Reason},
+ Protocol);
+ _ ->
+ ok
+ end.
+
+send_unblocked(#v1{connection = #connection{protocol = Protocol,
+ capabilities = Capabilities},
+ sock = Sock}) ->
+ case rabbit_misc:table_lookup(Capabilities, <<"connection.blocked">>) of
+ {bool, true} ->
+ ok = send_on_channel0(Sock, #'connection.unblocked'{}, Protocol);
+ _ ->
+ ok
+ end.
+
+%%--------------------------------------------------------------------------
+%% error handling / termination
+
+close_connection(State = #v1{queue_collector = Collector,
+ connection = #connection{
+ timeout_sec = TimeoutSec}}) ->
+ %% The spec says "Exclusive queues may only be accessed by the
+ %% current connection, and are deleted when that connection
+ %% closes." This does not strictly imply synchrony, but in
+ %% practice it seems to be what people assume.
+ clean_up_exclusive_queues(Collector),
+ %% We terminate the connection after the specified interval, but
+ %% no later than ?CLOSING_TIMEOUT seconds.
+ erlang:send_after((if TimeoutSec > 0 andalso
+ TimeoutSec < ?CLOSING_TIMEOUT -> TimeoutSec;
+ true -> ?CLOSING_TIMEOUT
+ end) * 1000, self(), terminate_connection),
+ State#v1{connection_state = closed}.
+
+%% queue collector will be undefined when connection
+%% tuning was never performed or didn't finish. In such cases
+%% there's also nothing to clean up.
+clean_up_exclusive_queues(undefined) ->
+ ok;
+
+clean_up_exclusive_queues(Collector) ->
+ rabbit_queue_collector:delete_all(Collector).
+
+handle_dependent_exit(ChPid, Reason, State) ->
+ {Channel, State1} = channel_cleanup(ChPid, State),
+ case {Channel, termination_kind(Reason)} of
+ {undefined, controlled} -> State1;
+ {undefined, uncontrolled} -> handle_uncontrolled_channel_close(ChPid),
+ exit({abnormal_dependent_exit,
+ ChPid, Reason});
+ {_, controlled} -> maybe_close(control_throttle(State1));
+ {_, uncontrolled} -> handle_uncontrolled_channel_close(ChPid),
+ State2 = handle_exception(
+ State1, Channel, Reason),
+ maybe_close(control_throttle(State2))
+ end.
+
+terminate_channels(#v1{channel_count = 0} = State) ->
+ State;
+terminate_channels(#v1{channel_count = ChannelCount} = State) ->
+ lists:foreach(fun rabbit_channel:shutdown/1, all_channels()),
+ Timeout = 1000 * ?CHANNEL_TERMINATION_TIMEOUT * ChannelCount,
+ TimerRef = erlang:send_after(Timeout, self(), cancel_wait),
+ wait_for_channel_termination(ChannelCount, TimerRef, State).
+
+wait_for_channel_termination(0, TimerRef, State) ->
+ case erlang:cancel_timer(TimerRef) of
+ false -> receive
+ cancel_wait -> State
+ end;
+ _ -> State
+ end;
+wait_for_channel_termination(N, TimerRef,
+ State = #v1{connection_state = CS,
+ connection = #connection{
+ log_name = ConnName,
+ user = User,
+ vhost = VHost},
+ sock = Sock}) ->
+ receive
+ {'DOWN', _MRef, process, ChPid, Reason} ->
+ {Channel, State1} = channel_cleanup(ChPid, State),
+ case {Channel, termination_kind(Reason)} of
+ {undefined, _} ->
+ exit({abnormal_dependent_exit, ChPid, Reason});
+ {_, controlled} ->
+ wait_for_channel_termination(N-1, TimerRef, State1);
+ {_, uncontrolled} ->
+ rabbit_log_connection:error(
+ "Error on AMQP connection ~p (~s, vhost: '~s',"
+ " user: '~s', state: ~p), channel ~p:"
+ "error while terminating:~n~p~n",
+ [self(), ConnName, VHost, User#user.username,
+ CS, Channel, Reason]),
+ handle_uncontrolled_channel_close(ChPid),
+ wait_for_channel_termination(N-1, TimerRef, State1)
+ end;
+ {'EXIT', Sock, _Reason} ->
+ clean_up_all_channels(State),
+ exit(normal);
+ cancel_wait ->
+ exit(channel_termination_timeout)
+ end.
+
+maybe_close(State = #v1{connection_state = closing,
+ channel_count = 0,
+ connection = #connection{protocol = Protocol},
+ sock = Sock}) ->
+ NewState = close_connection(State),
+ ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol),
+ NewState;
+maybe_close(State) ->
+ State.
+
+termination_kind(normal) -> controlled;
+termination_kind(_) -> uncontrolled.
+
+format_hard_error(#amqp_error{name = N, explanation = E, method = M}) ->
+ io_lib:format("operation ~s caused a connection exception ~s: ~p", [M, N, E]);
+format_hard_error(Reason) ->
+ case io_lib:deep_char_list(Reason) of
+ true -> Reason;
+ false -> rabbit_misc:format("~p", [Reason])
+ end.
+
+log_hard_error(#v1{connection_state = CS,
+ connection = #connection{
+ log_name = ConnName,
+ user = User,
+ vhost = VHost}}, Channel, Reason) ->
+ rabbit_log_connection:error(
+ "Error on AMQP connection ~p (~s, vhost: '~s',"
+ " user: '~s', state: ~p), channel ~p:~n ~s~n",
+ [self(), ConnName, VHost, User#user.username, CS, Channel, format_hard_error(Reason)]).
+
+handle_exception(State = #v1{connection_state = closed}, Channel, Reason) ->
+ log_hard_error(State, Channel, Reason),
+ State;
+handle_exception(State = #v1{connection = #connection{protocol = Protocol},
+ connection_state = CS},
+ Channel, Reason)
+ when ?IS_RUNNING(State) orelse CS =:= closing ->
+ respond_and_close(State, Channel, Protocol, Reason, Reason);
+%% authentication failure
+handle_exception(State = #v1{connection = #connection{protocol = Protocol,
+ log_name = ConnName,
+ capabilities = Capabilities},
+ connection_state = starting},
+ Channel, Reason = #amqp_error{name = access_refused,
+ explanation = ErrMsg}) ->
+ rabbit_log_connection:error(
+ "Error on AMQP connection ~p (~s, state: ~p):~n~s~n",
+ [self(), ConnName, starting, ErrMsg]),
+ %% respect authentication failure notification capability
+ case rabbit_misc:table_lookup(Capabilities,
+ <<"authentication_failure_close">>) of
+ {bool, true} ->
+ send_error_on_channel0_and_close(Channel, Protocol, Reason, State);
+ _ ->
+ close_connection(terminate_channels(State))
+ end;
+%% when loopback-only user tries to connect from a non-local host
+%% when user tries to access a vhost it has no permissions for
+handle_exception(State = #v1{connection = #connection{protocol = Protocol,
+ log_name = ConnName,
+ user = User},
+ connection_state = opening},
+ Channel, Reason = #amqp_error{name = not_allowed,
+ explanation = ErrMsg}) ->
+ rabbit_log_connection:error(
+ "Error on AMQP connection ~p (~s, user: '~s', state: ~p):~n~s~n",
+ [self(), ConnName, User#user.username, opening, ErrMsg]),
+ send_error_on_channel0_and_close(Channel, Protocol, Reason, State);
+handle_exception(State = #v1{connection = #connection{protocol = Protocol},
+ connection_state = CS = opening},
+ Channel, Reason = #amqp_error{}) ->
+ respond_and_close(State, Channel, Protocol, Reason,
+ {handshake_error, CS, Reason});
+%% when negotiation fails, e.g. due to channel_max being higher than the
+%% maximum allowed limit
+handle_exception(State = #v1{connection = #connection{protocol = Protocol,
+ log_name = ConnName,
+ user = User},
+ connection_state = tuning},
+ Channel, Reason = #amqp_error{name = not_allowed,
+ explanation = ErrMsg}) ->
+ rabbit_log_connection:error(
+ "Error on AMQP connection ~p (~s,"
+ " user: '~s', state: ~p):~n~s~n",
+ [self(), ConnName, User#user.username, tuning, ErrMsg]),
+ send_error_on_channel0_and_close(Channel, Protocol, Reason, State);
+handle_exception(State, Channel, Reason) ->
+ %% We don't trust the client at this point - force them to wait
+ %% for a bit so they can't DOS us with repeated failed logins etc.
+ timer:sleep(?SILENT_CLOSE_DELAY * 1000),
+ throw({handshake_error, State#v1.connection_state, Channel, Reason}).
+
+%% we've "lost sync" with the client and hence must not accept any
+%% more input
+-spec fatal_frame_error(_, _, _, _, _) -> no_return().
+fatal_frame_error(Error, Type, Channel, Payload, State) ->
+ frame_error(Error, Type, Channel, Payload, State),
+ %% grace period to allow transmission of error
+ timer:sleep(?SILENT_CLOSE_DELAY * 1000),
+ throw(fatal_frame_error).
+
+frame_error(Error, Type, Channel, Payload, State) ->
+ {Str, Bin} = payload_snippet(Payload),
+ handle_exception(State, Channel,
+ rabbit_misc:amqp_error(frame_error,
+ "type ~p, ~s octets = ~p: ~p",
+ [Type, Str, Bin, Error], none)).
+
+unexpected_frame(Type, Channel, Payload, State) ->
+ {Str, Bin} = payload_snippet(Payload),
+ handle_exception(State, Channel,
+ rabbit_misc:amqp_error(unexpected_frame,
+ "type ~p, ~s octets = ~p",
+ [Type, Str, Bin], none)).
+
+payload_snippet(Payload) when size(Payload) =< 16 ->
+ {"all", Payload};
+payload_snippet(<<Snippet:16/binary, _/binary>>) ->
+ {"first 16", Snippet}.
+
+%%--------------------------------------------------------------------------
+
+create_channel(_Channel,
+ #v1{channel_count = ChannelCount,
+ connection = #connection{channel_max = ChannelMax}})
+ when ChannelMax /= 0 andalso ChannelCount >= ChannelMax ->
+ {error, rabbit_misc:amqp_error(
+ not_allowed, "number of channels opened (~w) has reached the "
+ "negotiated channel_max (~w)",
+ [ChannelCount, ChannelMax], 'none')};
+create_channel(Channel,
+ #v1{sock = Sock,
+ queue_collector = Collector,
+ channel_sup_sup_pid = ChanSupSup,
+ channel_count = ChannelCount,
+ connection =
+ #connection{name = Name,
+ protocol = Protocol,
+ frame_max = FrameMax,
+ vhost = VHost,
+ capabilities = Capabilities,
+ user = #user{username = Username} = User}
+ } = State) ->
+ case rabbit_auth_backend_internal:is_over_channel_limit(Username) of
+ false ->
+ {ok, _ChSupPid, {ChPid, AState}} =
+ rabbit_channel_sup_sup:start_channel(
+ ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), Name,
+ Protocol, User, VHost, Capabilities,
+ Collector}),
+ MRef = erlang:monitor(process, ChPid),
+ put({ch_pid, ChPid}, {Channel, MRef}),
+ put({channel, Channel}, {ChPid, AState}),
+ {ok, {ChPid, AState}, State#v1{channel_count = ChannelCount + 1}};
+ {true, Limit} ->
+ {error, rabbit_misc:amqp_error(not_allowed,
+ "number of channels opened for user '~s' has reached "
+ "the maximum allowed user limit of (~w)",
+ [Username, Limit], 'none')}
+ end.
+
+channel_cleanup(ChPid, State = #v1{channel_count = ChannelCount}) ->
+ case get({ch_pid, ChPid}) of
+ undefined -> {undefined, State};
+ {Channel, MRef} -> credit_flow:peer_down(ChPid),
+ erase({channel, Channel}),
+ erase({ch_pid, ChPid}),
+ erlang:demonitor(MRef, [flush]),
+ {Channel, State#v1{channel_count = ChannelCount - 1}}
+ end.
+
+all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()].
+
+clean_up_all_channels(State) ->
+ CleanupFun = fun(ChPid) ->
+ channel_cleanup(ChPid, State)
+ end,
+ lists:foreach(CleanupFun, all_channels()).
+
+%%--------------------------------------------------------------------------
+
+handle_frame(Type, 0, Payload,
+ State = #v1{connection = #connection{protocol = Protocol}})
+ when ?IS_STOPPING(State) ->
+ case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of
+ {method, MethodName, FieldsBin} ->
+ handle_method0(MethodName, FieldsBin, State);
+ _Other -> State
+ end;
+handle_frame(Type, 0, Payload,
+ State = #v1{connection = #connection{protocol = Protocol}}) ->
+ case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of
+ error -> frame_error(unknown_frame, Type, 0, Payload, State);
+ heartbeat -> State;
+ {method, MethodName, FieldsBin} ->
+ handle_method0(MethodName, FieldsBin, State);
+ _Other -> unexpected_frame(Type, 0, Payload, State)
+ end;
+handle_frame(Type, Channel, Payload,
+ State = #v1{connection = #connection{protocol = Protocol}})
+ when ?IS_RUNNING(State) ->
+ case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of
+ error -> frame_error(unknown_frame, Type, Channel, Payload, State);
+ heartbeat -> unexpected_frame(Type, Channel, Payload, State);
+ Frame -> process_frame(Frame, Channel, State)
+ end;
+handle_frame(_Type, _Channel, _Payload, State) when ?IS_STOPPING(State) ->
+ State;
+handle_frame(Type, Channel, Payload, State) ->
+ unexpected_frame(Type, Channel, Payload, State).
+
+process_frame(Frame, Channel, State) ->
+ ChKey = {channel, Channel},
+ case (case get(ChKey) of
+ undefined -> create_channel(Channel, State);
+ Other -> {ok, Other, State}
+ end) of
+ {error, Error} ->
+ handle_exception(State, Channel, Error);
+ {ok, {ChPid, AState}, State1} ->
+ case rabbit_command_assembler:process(Frame, AState) of
+ {ok, NewAState} ->
+ put(ChKey, {ChPid, NewAState}),
+ post_process_frame(Frame, ChPid, State1);
+ {ok, Method, NewAState} ->
+ rabbit_channel:do(ChPid, Method),
+ put(ChKey, {ChPid, NewAState}),
+ post_process_frame(Frame, ChPid, State1);
+ {ok, Method, Content, NewAState} ->
+ rabbit_channel:do_flow(ChPid, Method, Content),
+ put(ChKey, {ChPid, NewAState}),
+ post_process_frame(Frame, ChPid, control_throttle(State1));
+ {error, Reason} ->
+ handle_exception(State1, Channel, Reason)
+ end
+ end.
+
+post_process_frame({method, 'channel.close_ok', _}, ChPid, State) ->
+ {_, State1} = channel_cleanup(ChPid, State),
+ %% This is not strictly necessary, but more obviously
+ %% correct. Also note that we do not need to call maybe_close/1
+ %% since we cannot possibly be in the 'closing' state.
+ control_throttle(State1);
+post_process_frame({content_header, _, _, _, _}, _ChPid, State) ->
+ publish_received(State);
+post_process_frame({content_body, _}, _ChPid, State) ->
+ publish_received(State);
+post_process_frame(_Frame, _ChPid, State) ->
+ State.
+
+%%--------------------------------------------------------------------------
+
+%% We allow clients to exceed the frame size a little bit since quite
+%% a few get it wrong - off-by 1 or 8 (empty frame size) are typical.
+-define(FRAME_SIZE_FUDGE, ?EMPTY_FRAME_SIZE).
+
+handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32, _/binary>>,
+ State = #v1{connection = #connection{frame_max = FrameMax}})
+ when FrameMax /= 0 andalso
+ PayloadSize > FrameMax - ?EMPTY_FRAME_SIZE + ?FRAME_SIZE_FUDGE ->
+ fatal_frame_error(
+ {frame_too_large, PayloadSize, FrameMax - ?EMPTY_FRAME_SIZE},
+ Type, Channel, <<>>, State);
+handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32,
+ Payload:PayloadSize/binary, ?FRAME_END,
+ Rest/binary>>,
+ State) ->
+ {Rest, ensure_stats_timer(handle_frame(Type, Channel, Payload, State))};
+handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32, Rest/binary>>,
+ State) ->
+ {Rest, ensure_stats_timer(
+ switch_callback(State,
+ {frame_payload, Type, Channel, PayloadSize},
+ PayloadSize + 1))};
+handle_input({frame_payload, Type, Channel, PayloadSize}, Data, State) ->
+ <<Payload:PayloadSize/binary, EndMarker, Rest/binary>> = Data,
+ case EndMarker of
+ ?FRAME_END -> State1 = handle_frame(Type, Channel, Payload, State),
+ {Rest, switch_callback(State1, frame_header, 7)};
+ _ -> fatal_frame_error({invalid_frame_end_marker, EndMarker},
+ Type, Channel, Payload, State)
+ end;
+handle_input(handshake, <<"AMQP", A, B, C, D, Rest/binary>>, State) ->
+ {Rest, handshake({A, B, C, D}, State)};
+handle_input(handshake, <<Other:8/binary, _/binary>>, #v1{sock = Sock}) ->
+ refuse_connection(Sock, {bad_header, Other});
+handle_input(Callback, Data, _State) ->
+ throw({bad_input, Callback, Data}).
+
+%% The two rules pertaining to version negotiation:
+%%
+%% * If the server cannot support the protocol specified in the
+%% protocol header, it MUST respond with a valid protocol header and
+%% then close the socket connection.
+%%
+%% * The server MUST provide a protocol version that is lower than or
+%% equal to that requested by the client in the protocol header.
+handshake({0, 0, 9, 1}, State) ->
+ start_connection({0, 9, 1}, rabbit_framing_amqp_0_9_1, State);
+
+%% This is the protocol header for 0-9, which we can safely treat as
+%% though it were 0-9-1.
+handshake({1, 1, 0, 9}, State) ->
+ start_connection({0, 9, 0}, rabbit_framing_amqp_0_9_1, State);
+
+%% This is what most clients send for 0-8. The 0-8 spec, confusingly,
+%% defines the version as 8-0.
+handshake({1, 1, 8, 0}, State) ->
+ start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State);
+
+%% The 0-8 spec as on the AMQP web site actually has this as the
+%% protocol header; some libraries e.g., py-amqplib, send it when they
+%% want 0-8.
+handshake({1, 1, 9, 1}, State) ->
+ start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State);
+
+%% ... and finally, the 1.0 spec is crystal clear!
+handshake({Id, 1, 0, 0}, State) ->
+ become_1_0(Id, State);
+
+handshake(Vsn, #v1{sock = Sock}) ->
+ refuse_connection(Sock, {bad_version, Vsn}).
+
+%% Offer a protocol version to the client. Connection.start only
+%% includes a major and minor version number, Luckily 0-9 and 0-9-1
+%% are similar enough that clients will be happy with either.
+start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision},
+ Protocol,
+ State = #v1{sock = Sock, connection = Connection}) ->
+ rabbit_networking:register_connection(self()),
+ Start = #'connection.start'{
+ version_major = ProtocolMajor,
+ version_minor = ProtocolMinor,
+ server_properties = server_properties(Protocol),
+ mechanisms = auth_mechanisms_binary(Sock),
+ locales = <<"en_US">> },
+ ok = send_on_channel0(Sock, Start, Protocol),
+ switch_callback(State#v1{connection = Connection#connection{
+ timeout_sec = ?NORMAL_TIMEOUT,
+ protocol = Protocol},
+ connection_state = starting},
+ frame_header, 7).
+
+-spec refuse_connection(_, _, _) -> no_return().
+refuse_connection(Sock, Exception, {A, B, C, D}) ->
+ ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",A,B,C,D>>) end),
+ throw(Exception).
+
+-spec refuse_connection(rabbit_net:socket(), any()) -> no_return().
+
+refuse_connection(Sock, Exception) ->
+ refuse_connection(Sock, Exception, {0, 0, 9, 1}).
+
+ensure_stats_timer(State = #v1{connection_state = running}) ->
+ rabbit_event:ensure_stats_timer(State, #v1.stats_timer, emit_stats);
+ensure_stats_timer(State) ->
+ State.
+
+%%--------------------------------------------------------------------------
+
+handle_method0(MethodName, FieldsBin,
+ State = #v1{connection = #connection{protocol = Protocol}}) ->
+ try
+ handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin),
+ State)
+ catch throw:{inet_error, E} when E =:= closed; E =:= enotconn ->
+ maybe_emit_stats(State),
+ throw({connection_closed_abruptly, State});
+ exit:#amqp_error{method = none} = Reason ->
+ handle_exception(State, 0, Reason#amqp_error{method = MethodName});
+ Type:Reason:Stacktrace ->
+ handle_exception(State, 0, {Type, Reason, MethodName, Stacktrace})
+ end.
+
+handle_method0(#'connection.start_ok'{mechanism = Mechanism,
+ response = Response,
+ client_properties = ClientProperties},
+ State0 = #v1{connection_state = starting,
+ connection = Connection0,
+ sock = Sock}) ->
+ AuthMechanism = auth_mechanism_to_module(Mechanism, Sock),
+ Capabilities =
+ case rabbit_misc:table_lookup(ClientProperties, <<"capabilities">>) of
+ {table, Capabilities1} -> Capabilities1;
+ _ -> []
+ end,
+ Connection1 = Connection0#connection{
+ client_properties = ClientProperties,
+ capabilities = Capabilities,
+ auth_mechanism = {Mechanism, AuthMechanism},
+ auth_state = AuthMechanism:init(Sock)},
+ Connection2 = augment_connection_log_name(Connection1),
+ State = State0#v1{connection_state = securing,
+ connection = Connection2},
+ % adding client properties to process dictionary to send them later
+ % in the connection_closed event
+ put(client_properties, ClientProperties),
+ case user_provided_connection_name(Connection2) of
+ undefined ->
+ undefined;
+ UserProvidedConnectionName ->
+ put(connection_user_provided_name, UserProvidedConnectionName)
+ end,
+ auth_phase(Response, State);
+
+handle_method0(#'connection.secure_ok'{response = Response},
+ State = #v1{connection_state = securing}) ->
+ auth_phase(Response, State);
+
+handle_method0(#'connection.tune_ok'{frame_max = FrameMax,
+ channel_max = ChannelMax,
+ heartbeat = ClientHeartbeat},
+ State = #v1{connection_state = tuning,
+ connection = Connection,
+ helper_sup = SupPid,
+ sock = Sock}) ->
+ ok = validate_negotiated_integer_value(
+ frame_max, ?FRAME_MIN_SIZE, FrameMax),
+ ok = validate_negotiated_integer_value(
+ channel_max, ?CHANNEL_MIN, ChannelMax),
+ {ok, Collector} = rabbit_connection_helper_sup:start_queue_collector(
+ SupPid, Connection#connection.name),
+ Frame = rabbit_binary_generator:build_heartbeat_frame(),
+ Parent = self(),
+ SendFun =
+ fun() ->
+ case catch rabbit_net:send(Sock, Frame) of
+ ok ->
+ ok;
+ {error, Reason} ->
+ Parent ! {heartbeat_send_error, Reason};
+ Unexpected ->
+ Parent ! {heartbeat_send_error, Unexpected}
+ end,
+ ok
+ end,
+ ReceiveFun = fun() -> Parent ! heartbeat_timeout end,
+ Heartbeater = rabbit_heartbeat:start(
+ SupPid, Sock, Connection#connection.name,
+ ClientHeartbeat, SendFun, ClientHeartbeat, ReceiveFun),
+ State#v1{connection_state = opening,
+ connection = Connection#connection{
+ frame_max = FrameMax,
+ channel_max = ChannelMax,
+ timeout_sec = ClientHeartbeat},
+ queue_collector = Collector,
+ heartbeater = Heartbeater};
+
+handle_method0(#'connection.open'{virtual_host = VHost},
+ State = #v1{connection_state = opening,
+ connection = Connection = #connection{
+ log_name = ConnName,
+ user = User = #user{username = Username},
+ protocol = Protocol},
+ helper_sup = SupPid,
+ sock = Sock,
+ throttle = Throttle}) ->
+
+ ok = is_over_vhost_connection_limit(VHost, User),
+ ok = is_over_user_connection_limit(User),
+ ok = rabbit_access_control:check_vhost_access(User, VHost, {socket, Sock}, #{}),
+ ok = is_vhost_alive(VHost, User),
+ NewConnection = Connection#connection{vhost = VHost},
+ ok = send_on_channel0(Sock, #'connection.open_ok'{}, Protocol),
+
+ Alarms = rabbit_alarm:register(self(), {?MODULE, conserve_resources, []}),
+ BlockedBy = sets:from_list([{resource, Alarm} || Alarm <- Alarms]),
+ Throttle1 = Throttle#throttle{blocked_by = BlockedBy},
+
+ {ok, ChannelSupSupPid} =
+ rabbit_connection_helper_sup:start_channel_sup_sup(SupPid),
+ State1 = control_throttle(
+ State#v1{connection_state = running,
+ connection = NewConnection,
+ channel_sup_sup_pid = ChannelSupSupPid,
+ throttle = Throttle1}),
+ Infos = augment_infos_with_user_provided_connection_name(
+ [{type, network} | infos(?CREATION_EVENT_KEYS, State1)],
+ State1
+ ),
+ rabbit_core_metrics:connection_created(proplists:get_value(pid, Infos),
+ Infos),
+ rabbit_event:notify(connection_created, Infos),
+ maybe_emit_stats(State1),
+ rabbit_log_connection:info(
+ "connection ~p (~s): "
+ "user '~s' authenticated and granted access to vhost '~s'~n",
+ [self(), dynamic_connection_name(ConnName), Username, VHost]),
+ State1;
+handle_method0(#'connection.close'{}, State) when ?IS_RUNNING(State) ->
+ lists:foreach(fun rabbit_channel:shutdown/1, all_channels()),
+ maybe_close(State#v1{connection_state = closing});
+handle_method0(#'connection.close'{},
+ State = #v1{connection = #connection{protocol = Protocol},
+ sock = Sock})
+ when ?IS_STOPPING(State) ->
+ %% We're already closed or closing, so we don't need to cleanup
+ %% anything.
+ ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol),
+ State;
+handle_method0(#'connection.close_ok'{},
+ State = #v1{connection_state = closed}) ->
+ self() ! terminate_connection,
+ State;
+handle_method0(#'connection.update_secret'{new_secret = NewSecret, reason = Reason},
+ State = #v1{connection =
+ #connection{protocol = Protocol,
+ user = User = #user{username = Username},
+ log_name = ConnName} = Conn,
+ sock = Sock}) when ?IS_RUNNING(State) ->
+ rabbit_log_connection:debug(
+ "connection ~p (~s) of user '~s': "
+ "asked to update secret, reason: ~s~n",
+ [self(), dynamic_connection_name(ConnName), Username, Reason]),
+ case rabbit_access_control:update_state(User, NewSecret) of
+ {ok, User1} ->
+ %% User/auth backend state has been updated. Now we can propagate it to channels
+ %% asynchronously and return. All the channels have to do is to update their
+ %% own state.
+ %%
+ %% Any secret update errors coming from the authz backend will be handled in the other branch.
+ %% Therefore we optimistically do no error handling here. MK.
+ lists:foreach(fun(Ch) ->
+ rabbit_log:debug("Updating user/auth backend state for channel ~p", [Ch]),
+ _ = rabbit_channel:update_user_state(Ch, User1)
+ end, all_channels()),
+ ok = send_on_channel0(Sock, #'connection.update_secret_ok'{}, Protocol),
+ rabbit_log_connection:info(
+ "connection ~p (~s): "
+ "user '~s' updated secret, reason: ~s~n",
+ [self(), dynamic_connection_name(ConnName), Username, Reason]),
+ State#v1{connection = Conn#connection{user = User1}};
+ {refused, Message} ->
+ rabbit_log_connection:error("Secret update was refused for user '~p': ~p",
+ [Username, Message]),
+ rabbit_misc:protocol_error(not_allowed, "New secret was refused by one of the backends", []);
+ {error, Message} ->
+ rabbit_log_connection:error("Secret update for user '~p' failed: ~p",
+ [Username, Message]),
+ rabbit_misc:protocol_error(not_allowed,
+ "Secret update failed", [])
+ end;
+handle_method0(_Method, State) when ?IS_STOPPING(State) ->
+ State;
+handle_method0(_Method, #v1{connection_state = S}) ->
+ rabbit_misc:protocol_error(
+ channel_error, "unexpected method in connection state ~w", [S]).
+
+is_vhost_alive(VHostPath, User) ->
+ case rabbit_vhost_sup_sup:is_vhost_alive(VHostPath) of
+ true -> ok;
+ false ->
+ rabbit_misc:protocol_error(internal_error,
+ "access to vhost '~s' refused for user '~s': "
+ "vhost '~s' is down",
+ [VHostPath, User#user.username, VHostPath])
+ end.
+
+is_over_vhost_connection_limit(VHostPath, User) ->
+ try rabbit_vhost_limit:is_over_connection_limit(VHostPath) of
+ false -> ok;
+ {true, Limit} -> rabbit_misc:protocol_error(not_allowed,
+ "access to vhost '~s' refused for user '~s': "
+ "connection limit (~p) is reached",
+ [VHostPath, User#user.username, Limit])
+ catch
+ throw:{error, {no_such_vhost, VHostPath}} ->
+ rabbit_misc:protocol_error(not_allowed, "vhost ~s not found", [VHostPath])
+ end.
+
+is_over_user_connection_limit(#user{username = Username}) ->
+ case rabbit_auth_backend_internal:is_over_connection_limit(Username) of
+ false -> ok;
+ {true, Limit} -> rabbit_misc:protocol_error(not_allowed,
+ "Connection refused for user '~s': "
+ "user connection limit (~p) is reached",
+ [Username, Limit])
+ end.
+
+validate_negotiated_integer_value(Field, Min, ClientValue) ->
+ ServerValue = get_env(Field),
+ if ClientValue /= 0 andalso ClientValue < Min ->
+ fail_negotiation(Field, min, Min, ClientValue);
+ ServerValue /= 0 andalso (ClientValue =:= 0 orelse
+ ClientValue > ServerValue) ->
+ fail_negotiation(Field, max, ServerValue, ClientValue);
+ true ->
+ ok
+ end.
+
+%% keep dialyzer happy
+-spec fail_negotiation(atom(), 'min' | 'max', integer(), integer()) ->
+ no_return().
+fail_negotiation(Field, MinOrMax, ServerValue, ClientValue) ->
+ {S1, S2} = case MinOrMax of
+ min -> {lower, minimum};
+ max -> {higher, maximum}
+ end,
+ ClientValueDetail = get_client_value_detail(Field, ClientValue),
+ rabbit_misc:protocol_error(
+ not_allowed, "negotiated ~w = ~w~s is ~w than the ~w allowed value (~w)",
+ [Field, ClientValue, ClientValueDetail, S1, S2, ServerValue], 'connection.tune').
+
+get_env(Key) ->
+ {ok, Value} = application:get_env(rabbit, Key),
+ Value.
+
+send_on_channel0(Sock, Method, Protocol) ->
+ ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol).
+
+auth_mechanism_to_module(TypeBin, Sock) ->
+ case rabbit_registry:binary_to_type(TypeBin) of
+ {error, not_found} ->
+ rabbit_misc:protocol_error(
+ command_invalid, "unknown authentication mechanism '~s'",
+ [TypeBin]);
+ T ->
+ case {lists:member(T, auth_mechanisms(Sock)),
+ rabbit_registry:lookup_module(auth_mechanism, T)} of
+ {true, {ok, Module}} ->
+ Module;
+ _ ->
+ rabbit_misc:protocol_error(
+ command_invalid,
+ "invalid authentication mechanism '~s'", [T])
+ end
+ end.
+
+auth_mechanisms(Sock) ->
+ {ok, Configured} = application:get_env(auth_mechanisms),
+ [Name || {Name, Module} <- rabbit_registry:lookup_all(auth_mechanism),
+ Module:should_offer(Sock), lists:member(Name, Configured)].
+
+auth_mechanisms_binary(Sock) ->
+ list_to_binary(
+ string:join([atom_to_list(A) || A <- auth_mechanisms(Sock)], " ")).
+
+auth_phase(Response,
+ State = #v1{connection = Connection =
+ #connection{protocol = Protocol,
+ auth_mechanism = {Name, AuthMechanism},
+ auth_state = AuthState},
+ sock = Sock}) ->
+ RemoteAddress = list_to_binary(inet:ntoa(Connection#connection.host)),
+ case AuthMechanism:handle_response(Response, AuthState) of
+ {refused, Username, Msg, Args} ->
+ rabbit_core_metrics:auth_attempt_failed(RemoteAddress, Username, amqp091),
+ auth_fail(Username, Msg, Args, Name, State);
+ {protocol_error, Msg, Args} ->
+ rabbit_core_metrics:auth_attempt_failed(RemoteAddress, <<>>, amqp091),
+ notify_auth_result(none, user_authentication_failure,
+ [{error, rabbit_misc:format(Msg, Args)}],
+ State),
+ rabbit_misc:protocol_error(syntax_error, Msg, Args);
+ {challenge, Challenge, AuthState1} ->
+ rabbit_core_metrics:auth_attempt_succeeded(RemoteAddress, <<>>, amqp091),
+ Secure = #'connection.secure'{challenge = Challenge},
+ ok = send_on_channel0(Sock, Secure, Protocol),
+ State#v1{connection = Connection#connection{
+ auth_state = AuthState1}};
+ {ok, User = #user{username = Username}} ->
+ case rabbit_access_control:check_user_loopback(Username, Sock) of
+ ok ->
+ rabbit_core_metrics:auth_attempt_succeeded(RemoteAddress, Username, amqp091),
+ notify_auth_result(Username, user_authentication_success,
+ [], State);
+ not_allowed ->
+ rabbit_core_metrics:auth_attempt_failed(RemoteAddress, Username, amqp091),
+ auth_fail(Username, "user '~s' can only connect via "
+ "localhost", [Username], Name, State)
+ end,
+ Tune = #'connection.tune'{frame_max = get_env(frame_max),
+ channel_max = get_env(channel_max),
+ heartbeat = get_env(heartbeat)},
+ ok = send_on_channel0(Sock, Tune, Protocol),
+ State#v1{connection_state = tuning,
+ connection = Connection#connection{user = User,
+ auth_state = none}}
+ end.
+
+-spec auth_fail
+ (rabbit_types:username() | none, string(), [any()], binary(), #v1{}) ->
+ no_return().
+
+auth_fail(Username, Msg, Args, AuthName,
+ State = #v1{connection = #connection{protocol = Protocol,
+ capabilities = Capabilities}}) ->
+ notify_auth_result(Username, user_authentication_failure,
+ [{error, rabbit_misc:format(Msg, Args)}], State),
+ AmqpError = rabbit_misc:amqp_error(
+ access_refused, "~s login refused: ~s",
+ [AuthName, io_lib:format(Msg, Args)], none),
+ case rabbit_misc:table_lookup(Capabilities,
+ <<"authentication_failure_close">>) of
+ {bool, true} ->
+ SafeMsg = io_lib:format(
+ "Login was refused using authentication "
+ "mechanism ~s. For details see the broker "
+ "logfile.", [AuthName]),
+ AmqpError1 = AmqpError#amqp_error{explanation = SafeMsg},
+ {0, CloseMethod} = rabbit_binary_generator:map_exception(
+ 0, AmqpError1, Protocol),
+ ok = send_on_channel0(State#v1.sock, CloseMethod, Protocol);
+ _ -> ok
+ end,
+ rabbit_misc:protocol_error(AmqpError).
+
+notify_auth_result(Username, AuthResult, ExtraProps, State) ->
+ EventProps = [{connection_type, network},
+ {name, case Username of none -> ''; _ -> Username end}] ++
+ [case Item of
+ name -> {connection_name, i(name, State)};
+ _ -> {Item, i(Item, State)}
+ end || Item <- ?AUTH_NOTIFICATION_INFO_KEYS] ++
+ ExtraProps,
+ rabbit_event:notify(AuthResult, [P || {_, V} = P <- EventProps, V =/= '']).
+
+%%--------------------------------------------------------------------------
+
+infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
+
+i(pid, #v1{}) -> self();
+i(node, #v1{}) -> node();
+i(SockStat, S) when SockStat =:= recv_oct;
+ SockStat =:= recv_cnt;
+ SockStat =:= send_oct;
+ SockStat =:= send_cnt;
+ SockStat =:= send_pend ->
+ socket_info(fun (Sock) -> rabbit_net:getstat(Sock, [SockStat]) end,
+ fun ([{_, I}]) -> I end, S);
+i(ssl, #v1{sock = Sock}) -> rabbit_net:is_ssl(Sock);
+i(ssl_protocol, S) -> ssl_info(fun ({P, _}) -> P end, S);
+i(ssl_key_exchange, S) -> ssl_info(fun ({_, {K, _, _}}) -> K end, S);
+i(ssl_cipher, S) -> ssl_info(fun ({_, {_, C, _}}) -> C end, S);
+i(ssl_hash, S) -> ssl_info(fun ({_, {_, _, H}}) -> H end, S);
+i(peer_cert_issuer, S) -> cert_info(fun rabbit_ssl:peer_cert_issuer/1, S);
+i(peer_cert_subject, S) -> cert_info(fun rabbit_ssl:peer_cert_subject/1, S);
+i(peer_cert_validity, S) -> cert_info(fun rabbit_ssl:peer_cert_validity/1, S);
+i(channels, #v1{channel_count = ChannelCount}) -> ChannelCount;
+i(state, #v1{connection_state = ConnectionState,
+ throttle = #throttle{blocked_by = Reasons,
+ last_blocked_at = T} = Throttle}) ->
+ %% not throttled by resource or other longer-term reasons
+ %% TODO: come up with a sensible function name
+ case sets:size(sets:del_element(flow, Reasons)) =:= 0 andalso
+ (credit_flow:blocked() %% throttled by flow now
+ orelse %% throttled by flow recently
+ (is_blocked_by_flow(Throttle) andalso T =/= never andalso
+ erlang:convert_time_unit(erlang:monotonic_time() - T,
+ native,
+ micro_seconds) < 5000000)) of
+ true -> flow;
+ false ->
+ case {has_reasons_to_block(Throttle), ConnectionState} of
+ %% blocked
+ {_, blocked} -> blocked;
+ %% not yet blocked (there were no publishes)
+ {true, running} -> blocking;
+ %% not blocked
+ {false, _} -> ConnectionState;
+ %% catch all to be defensive
+ _ -> ConnectionState
+ end
+ end;
+i(garbage_collection, _State) ->
+ rabbit_misc:get_gc_info(self());
+i(reductions, _State) ->
+ {reductions, Reductions} = erlang:process_info(self(), reductions),
+ Reductions;
+i(Item, #v1{connection = Conn}) -> ic(Item, Conn).
+
+ic(name, #connection{name = Name}) -> Name;
+ic(host, #connection{host = Host}) -> Host;
+ic(peer_host, #connection{peer_host = PeerHost}) -> PeerHost;
+ic(port, #connection{port = Port}) -> Port;
+ic(peer_port, #connection{peer_port = PeerPort}) -> PeerPort;
+ic(protocol, #connection{protocol = none}) -> none;
+ic(protocol, #connection{protocol = P}) -> P:version();
+ic(user, #connection{user = none}) -> '';
+ic(user, #connection{user = U}) -> U#user.username;
+ic(user_who_performed_action, C) -> ic(user, C);
+ic(vhost, #connection{vhost = VHost}) -> VHost;
+ic(timeout, #connection{timeout_sec = Timeout}) -> Timeout;
+ic(frame_max, #connection{frame_max = FrameMax}) -> FrameMax;
+ic(channel_max, #connection{channel_max = ChMax}) -> ChMax;
+ic(client_properties, #connection{client_properties = CP}) -> CP;
+ic(auth_mechanism, #connection{auth_mechanism = none}) -> none;
+ic(auth_mechanism, #connection{auth_mechanism = {Name, _Mod}}) -> Name;
+ic(connected_at, #connection{connected_at = T}) -> T;
+ic(Item, #connection{}) -> throw({bad_argument, Item}).
+
+socket_info(Get, Select, #v1{sock = Sock}) ->
+ case Get(Sock) of
+ {ok, T} -> case Select(T) of
+ N when is_number(N) -> N;
+ _ -> 0
+ end;
+ {error, _} -> 0
+ end.
+
+ssl_info(F, #v1{sock = Sock}) ->
+ case rabbit_net:ssl_info(Sock) of
+ nossl -> '';
+ {error, _} -> '';
+ {ok, Items} ->
+ P = proplists:get_value(protocol, Items),
+ #{cipher := C,
+ key_exchange := K,
+ mac := H} = proplists:get_value(selected_cipher_suite, Items),
+ F({P, {K, C, H}})
+ end.
+
+cert_info(F, #v1{sock = Sock}) ->
+ case rabbit_net:peercert(Sock) of
+ nossl -> '';
+ {error, _} -> '';
+ {ok, Cert} -> list_to_binary(F(Cert))
+ end.
+
+maybe_emit_stats(State) ->
+ rabbit_event:if_enabled(State, #v1.stats_timer,
+ fun() -> emit_stats(State) end).
+
+emit_stats(State) ->
+ [{_, Pid}, {_, Recv_oct}, {_, Send_oct}, {_, Reductions}] = I
+ = infos(?SIMPLE_METRICS, State),
+ Infos = infos(?OTHER_METRICS, State),
+ rabbit_core_metrics:connection_stats(Pid, Infos),
+ rabbit_core_metrics:connection_stats(Pid, Recv_oct, Send_oct, Reductions),
+ rabbit_event:notify(connection_stats, Infos ++ I),
+ State1 = rabbit_event:reset_stats_timer(State, #v1.stats_timer),
+ ensure_stats_timer(State1).
+
+%% 1.0 stub
+-spec become_1_0(non_neg_integer(), #v1{}) -> no_return().
+
+become_1_0(Id, State = #v1{sock = Sock}) ->
+ case code:is_loaded(rabbit_amqp1_0_reader) of
+ false -> refuse_connection(Sock, amqp1_0_plugin_not_enabled);
+ _ -> Mode = case Id of
+ 0 -> amqp;
+ 3 -> sasl;
+ _ -> refuse_connection(
+ Sock, {unsupported_amqp1_0_protocol_id, Id},
+ {3, 1, 0, 0})
+ end,
+ F = fun (_Deb, Buf, BufLen, S) ->
+ {rabbit_amqp1_0_reader, init,
+ [Mode, pack_for_1_0(Buf, BufLen, S)]}
+ end,
+ State#v1{connection_state = {become, F}}
+ end.
+
+pack_for_1_0(Buf, BufLen, #v1{parent = Parent,
+ sock = Sock,
+ recv_len = RecvLen,
+ pending_recv = PendingRecv,
+ helper_sup = SupPid,
+ proxy_socket = ProxySocket}) ->
+ {Parent, Sock, RecvLen, PendingRecv, SupPid, Buf, BufLen, ProxySocket}.
+
+respond_and_close(State, Channel, Protocol, Reason, LogErr) ->
+ log_hard_error(State, Channel, LogErr),
+ send_error_on_channel0_and_close(Channel, Protocol, Reason, State).
+
+send_error_on_channel0_and_close(Channel, Protocol, Reason, State) ->
+ {0, CloseMethod} =
+ rabbit_binary_generator:map_exception(Channel, Reason, Protocol),
+ State1 = close_connection(terminate_channels(State)),
+ ok = send_on_channel0(State#v1.sock, CloseMethod, Protocol),
+ State1.
+
+%%
+%% Publisher throttling
+%%
+
+blocked_by_message(#throttle{blocked_by = Reasons}) ->
+ %% we don't want to report internal flow as a reason here since
+ %% it is entirely transient
+ Reasons1 = sets:del_element(flow, Reasons),
+ RStr = string:join([format_blocked_by(R) || R <- sets:to_list(Reasons1)], " & "),
+ list_to_binary(rabbit_misc:format("low on ~s", [RStr])).
+
+format_blocked_by({resource, memory}) -> "memory";
+format_blocked_by({resource, disk}) -> "disk";
+format_blocked_by({resource, disc}) -> "disk".
+
+update_last_blocked_at(Throttle) ->
+ Throttle#throttle{last_blocked_at = erlang:monotonic_time()}.
+
+connection_blocked_message_sent(
+ #throttle{connection_blocked_message_sent = BS}) -> BS.
+
+should_send_blocked(Throttle = #throttle{blocked_by = Reasons}) ->
+ should_block(Throttle)
+ andalso
+ sets:size(sets:del_element(flow, Reasons)) =/= 0
+ andalso
+ not connection_blocked_message_sent(Throttle).
+
+should_send_unblocked(Throttle = #throttle{blocked_by = Reasons}) ->
+ connection_blocked_message_sent(Throttle)
+ andalso
+ sets:size(sets:del_element(flow, Reasons)) == 0.
+
+%% Returns true if we have a reason to block
+%% this connection.
+has_reasons_to_block(#throttle{blocked_by = Reasons}) ->
+ sets:size(Reasons) > 0.
+
+is_blocked_by_flow(#throttle{blocked_by = Reasons}) ->
+ sets:is_element(flow, Reasons).
+
+should_block(#throttle{should_block = Val}) -> Val.
+
+should_block_connection(Throttle) ->
+ should_block(Throttle) andalso has_reasons_to_block(Throttle).
+
+should_unblock_connection(Throttle) ->
+ not should_block_connection(Throttle).
+
+maybe_block(State = #v1{connection_state = CS, throttle = Throttle}) ->
+ case should_block_connection(Throttle) of
+ true ->
+ State1 = State#v1{connection_state = blocked,
+ throttle = update_last_blocked_at(Throttle)},
+ case CS of
+ running ->
+ ok = rabbit_heartbeat:pause_monitor(State#v1.heartbeater);
+ _ -> ok
+ end,
+ maybe_send_blocked_or_unblocked(State1);
+ false -> State
+ end.
+
+maybe_unblock(State = #v1{throttle = Throttle}) ->
+ case should_unblock_connection(Throttle) of
+ true ->
+ ok = rabbit_heartbeat:resume_monitor(State#v1.heartbeater),
+ State1 = State#v1{connection_state = running,
+ throttle = Throttle#throttle{should_block = false}},
+ maybe_send_unblocked(State1);
+ false -> State
+ end.
+
+maybe_send_unblocked(State = #v1{throttle = Throttle}) ->
+ case should_send_unblocked(Throttle) of
+ true ->
+ ok = send_unblocked(State),
+ State#v1{throttle =
+ Throttle#throttle{connection_blocked_message_sent = false}};
+ false -> State
+ end.
+
+maybe_send_blocked_or_unblocked(State = #v1{throttle = Throttle}) ->
+ case should_send_blocked(Throttle) of
+ true ->
+ ok = send_blocked(State, blocked_by_message(Throttle)),
+ State#v1{throttle =
+ Throttle#throttle{connection_blocked_message_sent = true}};
+ false -> maybe_send_unblocked(State)
+ end.
+
+publish_received(State = #v1{throttle = Throttle}) ->
+ case has_reasons_to_block(Throttle) of
+ false -> State;
+ true ->
+ Throttle1 = Throttle#throttle{should_block = true},
+ maybe_block(State#v1{throttle = Throttle1})
+ end.
+
+control_throttle(State = #v1{connection_state = CS,
+ throttle = #throttle{blocked_by = Reasons} = Throttle}) ->
+ Throttle1 = case credit_flow:blocked() of
+ true ->
+ Throttle#throttle{blocked_by = sets:add_element(flow, Reasons)};
+ false ->
+ Throttle#throttle{blocked_by = sets:del_element(flow, Reasons)}
+ end,
+ State1 = State#v1{throttle = Throttle1},
+ case CS of
+ running -> maybe_block(State1);
+ %% unblock or re-enable blocking
+ blocked -> maybe_block(maybe_unblock(State1));
+ _ -> State1
+ end.
+
+augment_connection_log_name(#connection{name = Name} = Connection) ->
+ case user_provided_connection_name(Connection) of
+ undefined ->
+ Connection;
+ UserSpecifiedName ->
+ LogName = <<Name/binary, " - ", UserSpecifiedName/binary>>,
+ rabbit_log_connection:info("Connection ~p (~s) has a client-provided name: ~s~n", [self(), Name, UserSpecifiedName]),
+ ?store_proc_name(LogName),
+ Connection#connection{log_name = LogName}
+ end.
+
+augment_infos_with_user_provided_connection_name(Infos, #v1{connection = Connection}) ->
+ case user_provided_connection_name(Connection) of
+ undefined ->
+ Infos;
+ UserProvidedConnectionName ->
+ [{user_provided_name, UserProvidedConnectionName} | Infos]
+ end.
+
+user_provided_connection_name(#connection{client_properties = ClientProperties}) ->
+ case rabbit_misc:table_lookup(ClientProperties, <<"connection_name">>) of
+ {longstr, UserSpecifiedName} ->
+ UserSpecifiedName;
+ _ ->
+ undefined
+ end.
+
+dynamic_connection_name(Default) ->
+ case rabbit_misc:get_proc_name() of
+ {ok, Name} ->
+ Name;
+ _ ->
+ Default
+ end.
+
+handle_uncontrolled_channel_close(ChPid) ->
+ rabbit_core_metrics:channel_closed(ChPid),
+ rabbit_event:notify(channel_closed, [{pid, ChPid}]).
+
+-spec get_client_value_detail(atom(), integer()) -> string().
+get_client_value_detail(channel_max, 0) ->
+ " (no limit)";
+get_client_value_detail(_Field, _ClientValue) ->
+ "".
diff --git a/deps/rabbit/src/rabbit_recovery_terms.erl b/deps/rabbit/src/rabbit_recovery_terms.erl
new file mode 100644
index 0000000000..d89de9ece3
--- /dev/null
+++ b/deps/rabbit/src/rabbit_recovery_terms.erl
@@ -0,0 +1,240 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% We use a gen_server simply so that during the terminate/2 call
+%% (i.e., during shutdown), we can sync/flush the dets table to disk.
+
+-module(rabbit_recovery_terms).
+
+-behaviour(gen_server).
+
+-export([start/1, stop/1, store/3, read/2, clear/1]).
+
+-export([start_link/1]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-export([upgrade_recovery_terms/0, persistent_bytes/0]).
+-export([open_global_table/0, close_global_table/0,
+ read_global/1, delete_global_table/0]).
+-export([open_table/1, close_table/1]).
+
+-rabbit_upgrade({upgrade_recovery_terms, local, []}).
+-rabbit_upgrade({persistent_bytes, local, [upgrade_recovery_terms]}).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-spec start(rabbit_types:vhost()) -> rabbit_types:ok_or_error(term()).
+
+start(VHost) ->
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of
+ {ok, VHostSup} ->
+ {ok, _} = supervisor2:start_child(
+ VHostSup,
+ {?MODULE,
+ {?MODULE, start_link, [VHost]},
+ transient, ?WORKER_WAIT, worker,
+ [?MODULE]});
+ %% we can get here if a vhost is added and removed concurrently
+ %% e.g. some integration tests do it
+ {error, {no_such_vhost, VHost}} ->
+ rabbit_log:error("Failed to start a recovery terms manager for vhost ~s: vhost no longer exists!",
+ [VHost])
+ end,
+ ok.
+
+-spec stop(rabbit_types:vhost()) -> rabbit_types:ok_or_error(term()).
+
+stop(VHost) ->
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of
+ {ok, VHostSup} ->
+ case supervisor:terminate_child(VHostSup, ?MODULE) of
+ ok -> supervisor:delete_child(VHostSup, ?MODULE);
+ E -> E
+ end;
+ %% see start/1
+ {error, {no_such_vhost, VHost}} ->
+ rabbit_log:error("Failed to stop a recovery terms manager for vhost ~s: vhost no longer exists!",
+ [VHost]),
+
+ ok
+ end.
+
+-spec store(rabbit_types:vhost(), file:filename(), term()) -> rabbit_types:ok_or_error(term()).
+
+store(VHost, DirBaseName, Terms) ->
+ dets:insert(VHost, {DirBaseName, Terms}).
+
+-spec read(rabbit_types:vhost(), file:filename()) -> rabbit_types:ok_or_error2(term(), not_found).
+
+read(VHost, DirBaseName) ->
+ case dets:lookup(VHost, DirBaseName) of
+ [{_, Terms}] -> {ok, Terms};
+ _ -> {error, not_found}
+ end.
+
+-spec clear(rabbit_types:vhost()) -> 'ok'.
+
+clear(VHost) ->
+ try
+ dets:delete_all_objects(VHost)
+ %% see start/1
+ catch _:badarg ->
+ rabbit_log:error("Failed to clear recovery terms for vhost ~s: table no longer exists!",
+ [VHost]),
+ ok
+ end,
+ flush(VHost).
+
+start_link(VHost) ->
+ gen_server:start_link(?MODULE, [VHost], []).
+
+%%----------------------------------------------------------------------------
+
+upgrade_recovery_terms() ->
+ open_global_table(),
+ try
+ QueuesDir = filename:join(rabbit_mnesia:dir(), "queues"),
+ Dirs = case rabbit_file:list_dir(QueuesDir) of
+ {ok, Entries} -> Entries;
+ {error, _} -> []
+ end,
+ [begin
+ File = filename:join([QueuesDir, Dir, "clean.dot"]),
+ case rabbit_file:read_term_file(File) of
+ {ok, Terms} -> ok = store_global_table(Dir, Terms);
+ {error, _} -> ok
+ end,
+ file:delete(File)
+ end || Dir <- Dirs],
+ ok
+ after
+ close_global_table()
+ end.
+
+persistent_bytes() -> dets_upgrade(fun persistent_bytes/1).
+persistent_bytes(Props) -> Props ++ [{persistent_bytes, 0}].
+
+dets_upgrade(Fun)->
+ open_global_table(),
+ try
+ ok = dets:foldl(fun ({DirBaseName, Terms}, Acc) ->
+ store_global_table(DirBaseName, Fun(Terms)),
+ Acc
+ end, ok, ?MODULE),
+ ok
+ after
+ close_global_table()
+ end.
+
+open_global_table() ->
+ File = filename:join(rabbit_mnesia:dir(), "recovery.dets"),
+ {ok, _} = dets:open_file(?MODULE, [{file, File},
+ {ram_file, true},
+ {auto_save, infinity}]),
+ ok.
+
+close_global_table() ->
+ try
+ dets:sync(?MODULE),
+ dets:close(?MODULE)
+ %% see clear/1
+ catch _:badarg ->
+ rabbit_log:error("Failed to clear global recovery terms: table no longer exists!",
+ []),
+ ok
+ end.
+
+store_global_table(DirBaseName, Terms) ->
+ dets:insert(?MODULE, {DirBaseName, Terms}).
+
+read_global(DirBaseName) ->
+ case dets:lookup(?MODULE, DirBaseName) of
+ [{_, Terms}] -> {ok, Terms};
+ _ -> {error, not_found}
+ end.
+
+delete_global_table() ->
+ file:delete(filename:join(rabbit_mnesia:dir(), "recovery.dets")).
+
+%%----------------------------------------------------------------------------
+
+init([VHost]) ->
+ process_flag(trap_exit, true),
+ open_table(VHost),
+ {ok, VHost}.
+
+handle_call(Msg, _, State) -> {stop, {unexpected_call, Msg}, State}.
+
+handle_cast(Msg, State) -> {stop, {unexpected_cast, Msg}, State}.
+
+handle_info(_Info, State) -> {noreply, State}.
+
+terminate(_Reason, VHost) ->
+ close_table(VHost).
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%----------------------------------------------------------------------------
+
+-spec open_table(vhost:name()) -> rabbit_types:ok_or_error(any()).
+
+open_table(VHost) ->
+ open_table(VHost, 10).
+
+-spec open_table(vhost:name(), non_neg_integer()) -> rabbit_types:ok_or_error(any()).
+
+open_table(VHost, RetriesLeft) ->
+ VHostDir = rabbit_vhost:msg_store_dir_path(VHost),
+ File = filename:join(VHostDir, "recovery.dets"),
+ Opts = [{file, File},
+ {ram_file, true},
+ {auto_save, infinity}],
+ case dets:open_file(VHost, Opts) of
+ {ok, _} -> ok;
+ {error, Error} ->
+ case RetriesLeft of
+ 0 ->
+ {error, Error};
+ N when is_integer(N) ->
+ _ = file:delete(File),
+ %% Wait before retrying
+ DelayInMs = 1000,
+ rabbit_log:warning("Failed to open a recovery terms DETS file at ~p. Will delete it and retry in ~p ms (~p retries left)",
+ [File, DelayInMs, RetriesLeft]),
+ timer:sleep(DelayInMs),
+ open_table(VHost, RetriesLeft - 1)
+ end
+ end.
+
+-spec flush(vhost:name()) -> rabbit_types:ok_or_error(any()).
+
+flush(VHost) ->
+ try
+ dets:sync(VHost)
+ %% see clear/1
+ catch _:badarg ->
+ rabbit_log:error("Failed to sync recovery terms table for vhost ~s: the table no longer exists!",
+ [VHost]),
+ ok
+ end.
+
+-spec close_table(vhost:name()) -> rabbit_types:ok_or_error(any()).
+
+close_table(VHost) ->
+ try
+ ok = flush(VHost),
+ ok = dets:close(VHost)
+ %% see clear/1
+ catch _:badarg ->
+ rabbit_log:error("Failed to close recovery terms table for vhost ~s: the table no longer exists!",
+ [VHost]),
+ ok
+ end.
diff --git a/deps/rabbit/src/rabbit_restartable_sup.erl b/deps/rabbit/src/rabbit_restartable_sup.erl
new file mode 100644
index 0000000000..46fcace99f
--- /dev/null
+++ b/deps/rabbit/src/rabbit_restartable_sup.erl
@@ -0,0 +1,33 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_restartable_sup).
+
+-behaviour(supervisor2).
+
+-export([start_link/3]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+-define(DELAY, 2).
+
+%%----------------------------------------------------------------------------
+
+-spec start_link(atom(), rabbit_types:mfargs(), boolean()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(Name, {_M, _F, _A} = Fun, Delay) ->
+ supervisor2:start_link({local, Name}, ?MODULE, [Fun, Delay]).
+
+init([{Mod, _F, _A} = Fun, Delay]) ->
+ {ok, {{one_for_one, 10, 10},
+ [{Mod, Fun, case Delay of
+ true -> {transient, 1};
+ false -> transient
+ end, ?WORKER_WAIT, worker, [Mod]}]}}.
diff --git a/deps/rabbit/src/rabbit_router.erl b/deps/rabbit/src/rabbit_router.erl
new file mode 100644
index 0000000000..ed170bcd8e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_router.erl
@@ -0,0 +1,65 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_router).
+-include_lib("stdlib/include/qlc.hrl").
+-include("rabbit.hrl").
+
+-export([match_bindings/2, match_routing_key/2]).
+
+%%----------------------------------------------------------------------------
+
+-export_type([routing_key/0, match_result/0]).
+
+-type routing_key() :: binary().
+-type match_result() :: [rabbit_types:binding_destination()].
+
+-spec match_bindings(rabbit_types:binding_source(),
+ fun ((rabbit_types:binding()) -> boolean())) ->
+ match_result().
+-spec match_routing_key(rabbit_types:binding_source(),
+ [routing_key()] | ['_']) ->
+ match_result().
+
+%%----------------------------------------------------------------------------
+
+match_bindings(SrcName, Match) ->
+ MatchHead = #route{binding = #binding{source = SrcName,
+ _ = '_'}},
+ Routes = ets:select(rabbit_route, [{MatchHead, [], [['$_']]}]),
+ [Dest || [#route{binding = Binding = #binding{destination = Dest}}] <-
+ Routes, Match(Binding)].
+
+match_routing_key(SrcName, [RoutingKey]) ->
+ find_routes(#route{binding = #binding{source = SrcName,
+ destination = '$1',
+ key = RoutingKey,
+ _ = '_'}},
+ []);
+match_routing_key(SrcName, [_|_] = RoutingKeys) ->
+ find_routes(#route{binding = #binding{source = SrcName,
+ destination = '$1',
+ key = '$2',
+ _ = '_'}},
+ [list_to_tuple(['orelse' | [{'=:=', '$2', RKey} ||
+ RKey <- RoutingKeys]])]).
+
+%%--------------------------------------------------------------------
+
+%% Normally we'd call mnesia:dirty_select/2 here, but that is quite
+%% expensive for the same reasons as above, and, additionally, due to
+%% mnesia 'fixing' the table with ets:safe_fixtable/2, which is wholly
+%% unnecessary. According to the ets docs (and the code in erl_db.c),
+%% 'select' is safe anyway ("Functions that internally traverse over a
+%% table, like select and match, will give the same guarantee as
+%% safe_fixtable.") and, furthermore, even the lower level iterators
+%% ('first' and 'next') are safe on ordered_set tables ("Note that for
+%% tables of the ordered_set type, safe_fixtable/2 is not necessary as
+%% calls to first/1 and next/2 will always succeed."), which
+%% rabbit_route is.
+find_routes(MatchHead, Conditions) ->
+ ets:select(rabbit_route, [{MatchHead, Conditions, ['$1']}]).
diff --git a/deps/rabbit/src/rabbit_runtime_parameters.erl b/deps/rabbit/src/rabbit_runtime_parameters.erl
new file mode 100644
index 0000000000..1870b5dfa5
--- /dev/null
+++ b/deps/rabbit/src/rabbit_runtime_parameters.erl
@@ -0,0 +1,412 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_runtime_parameters).
+
+%% Runtime parameters are bits of configuration that are
+%% set, as the name implies, at runtime and not in the config file.
+%%
+%% The benefits of storing some bits of configuration at runtime vary:
+%%
+%% * Some parameters are vhost-specific
+%% * Others are specific to individual nodes
+%% * ...or even queues, exchanges, etc
+%%
+%% The most obvious use case for runtime parameters is policies but
+%% there are others:
+%%
+%% * Plugin-specific parameters that only make sense at runtime,
+%% e.g. Federation and Shovel link settings
+%% * Exchange and queue decorators
+%%
+%% Parameters are grouped by components, e.g. <<"policy">> or <<"shovel">>.
+%% Components are mapped to modules that perform validation.
+%% Runtime parameter values are then looked up by the modules that
+%% need to use them.
+%%
+%% Parameters are stored in Mnesia and can be global. Their changes
+%% are broadcasted over rabbit_event.
+%%
+%% Global parameters keys are atoms and values are JSON documents.
+%%
+%% See also:
+%%
+%% * rabbit_policies
+%% * rabbit_policy
+%% * rabbit_registry
+%% * rabbit_event
+
+-include("rabbit.hrl").
+
+-export([parse_set/5, set/5, set_any/5, clear/4, clear_any/4, list/0, list/1,
+ list_component/1, list/2, list_formatted/1, list_formatted/3,
+ lookup/3, value/3, value/4, info_keys/0, clear_component/2]).
+
+-export([parse_set_global/3, set_global/3, value_global/1, value_global/2,
+ list_global/0, list_global_formatted/0, list_global_formatted/2,
+ lookup_global/1, global_info_keys/0, clear_global/2]).
+
+%%----------------------------------------------------------------------------
+
+-type ok_or_error_string() :: 'ok' | {'error_string', string()}.
+-type ok_thunk_or_error_string() :: ok_or_error_string() | fun(() -> 'ok').
+
+-spec parse_set(rabbit_types:vhost(), binary(), binary(), string(),
+ rabbit_types:user() | rabbit_types:username() | 'none')
+ -> ok_or_error_string().
+-spec set(rabbit_types:vhost(), binary(), binary(), term(),
+ rabbit_types:user() | rabbit_types:username() | 'none')
+ -> ok_or_error_string().
+-spec set_any(rabbit_types:vhost(), binary(), binary(), term(),
+ rabbit_types:user() | rabbit_types:username() | 'none')
+ -> ok_or_error_string().
+-spec set_global(atom(), term(), rabbit_types:username()) -> 'ok'.
+-spec clear(rabbit_types:vhost(), binary(), binary(), rabbit_types:username())
+ -> ok_thunk_or_error_string().
+-spec clear_any(rabbit_types:vhost(), binary(), binary(), rabbit_types:username())
+ -> ok_thunk_or_error_string().
+-spec list() -> [rabbit_types:infos()].
+-spec list(rabbit_types:vhost() | '_') -> [rabbit_types:infos()].
+-spec list_component(binary()) -> [rabbit_types:infos()].
+-spec list(rabbit_types:vhost() | '_', binary() | '_')
+ -> [rabbit_types:infos()].
+-spec list_formatted(rabbit_types:vhost()) -> [rabbit_types:infos()].
+-spec list_formatted(rabbit_types:vhost(), reference(), pid()) -> 'ok'.
+-spec lookup(rabbit_types:vhost(), binary(), binary())
+ -> rabbit_types:infos() | 'not_found'.
+-spec value(rabbit_types:vhost(), binary(), binary()) -> term().
+-spec value(rabbit_types:vhost(), binary(), binary(), term()) -> term().
+-spec value_global(atom()) -> term() | 'not_found'.
+-spec value_global(atom(), term()) -> term().
+-spec info_keys() -> rabbit_types:info_keys().
+
+%%---------------------------------------------------------------------------
+
+-import(rabbit_misc, [pget/2]).
+
+-define(TABLE, rabbit_runtime_parameters).
+
+%%---------------------------------------------------------------------------
+
+parse_set(_, <<"policy">>, _, _, _) ->
+ {error_string, "policies may not be set using this method"};
+parse_set(VHost, Component, Name, String, User) ->
+ Definition = rabbit_data_coercion:to_binary(String),
+ case rabbit_json:try_decode(Definition) of
+ {ok, Term} when is_map(Term) -> set(VHost, Component, Name, maps:to_list(Term), User);
+ {ok, Term} -> set(VHost, Component, Name, Term, User);
+ {error, Reason} ->
+ {error_string,
+ rabbit_misc:format("JSON decoding error. Reason: ~ts", [Reason])}
+ end.
+
+set(_, <<"policy">>, _, _, _) ->
+ {error_string, "policies may not be set using this method"};
+set(VHost, Component, Name, Term, User) ->
+ set_any(VHost, Component, Name, Term, User).
+
+parse_set_global(Name, String, ActingUser) ->
+ Definition = rabbit_data_coercion:to_binary(String),
+ case rabbit_json:try_decode(Definition) of
+ {ok, Term} when is_map(Term) -> set_global(Name, maps:to_list(Term), ActingUser);
+ {ok, Term} -> set_global(Name, Term, ActingUser);
+ {error, Reason} ->
+ {error_string,
+ rabbit_misc:format("JSON decoding error. Reason: ~ts", [Reason])}
+ end.
+
+set_global(Name, Term, ActingUser) ->
+ NameAsAtom = rabbit_data_coercion:to_atom(Name),
+ rabbit_log:debug("Setting global parameter '~s' to ~p", [NameAsAtom, Term]),
+ mnesia_update(NameAsAtom, Term),
+ event_notify(parameter_set, none, global, [{name, NameAsAtom},
+ {value, Term},
+ {user_who_performed_action, ActingUser}]),
+ ok.
+
+format_error(L) ->
+ {error_string, rabbit_misc:format_many([{"Validation failed~n", []} | L])}.
+
+set_any(VHost, Component, Name, Term, User) ->
+ case set_any0(VHost, Component, Name, Term, User) of
+ ok -> ok;
+ {errors, L} -> format_error(L)
+ end.
+
+set_any0(VHost, Component, Name, Term, User) ->
+ rabbit_log:debug("Asked to set or update runtime parameter '~s' in vhost '~s' "
+ "for component '~s', value: ~p",
+ [Name, VHost, Component, Term]),
+ case lookup_component(Component) of
+ {ok, Mod} ->
+ case flatten_errors(
+ Mod:validate(VHost, Component, Name, Term, get_user(User))) of
+ ok ->
+ case mnesia_update(VHost, Component, Name, Term) of
+ {old, Term} ->
+ ok;
+ _ ->
+ ActingUser = get_username(User),
+ event_notify(
+ parameter_set, VHost, Component,
+ [{name, Name},
+ {value, Term},
+ {user_who_performed_action, ActingUser}]),
+ Mod:notify(VHost, Component, Name, Term, ActingUser)
+ end,
+ ok;
+ E ->
+ E
+ end;
+ E ->
+ E
+ end.
+
+%% Validate only an user record as expected by the API before #rabbitmq-event-exchange-10
+get_user(#user{} = User) ->
+ User;
+get_user(_) ->
+ none.
+
+get_username(#user{username = Username}) ->
+ Username;
+get_username(none) ->
+ ?INTERNAL_USER;
+get_username(Any) ->
+ Any.
+
+mnesia_update(Key, Term) ->
+ rabbit_misc:execute_mnesia_transaction(mnesia_update_fun(Key, Term)).
+
+mnesia_update(VHost, Comp, Name, Term) ->
+ rabbit_misc:execute_mnesia_transaction(
+ rabbit_vhost:with(VHost, mnesia_update_fun({VHost, Comp, Name}, Term))).
+
+mnesia_update_fun(Key, Term) ->
+ fun () ->
+ Res = case mnesia:read(?TABLE, Key, read) of
+ [] -> new;
+ [Params] -> {old, Params#runtime_parameters.value}
+ end,
+ ok = mnesia:write(?TABLE, c(Key, Term), write),
+ Res
+ end.
+
+clear(_, <<"policy">> , _, _) ->
+ {error_string, "policies may not be cleared using this method"};
+clear(VHost, Component, Name, ActingUser) ->
+ clear_any(VHost, Component, Name, ActingUser).
+
+clear_global(Key, ActingUser) ->
+ KeyAsAtom = rabbit_data_coercion:to_atom(Key),
+ Notify = fun() ->
+ event_notify(parameter_set, none, global,
+ [{name, KeyAsAtom},
+ {user_who_performed_action, ActingUser}]),
+ ok
+ end,
+ case value_global(KeyAsAtom) of
+ not_found ->
+ {error_string, "Parameter does not exist"};
+ _ ->
+ F = fun () ->
+ ok = mnesia:delete(?TABLE, KeyAsAtom, write)
+ end,
+ ok = rabbit_misc:execute_mnesia_transaction(F),
+ case mnesia:is_transaction() of
+ true -> Notify;
+ false -> Notify()
+ end
+ end.
+
+clear_component(Component, ActingUser) ->
+ case list_component(Component) of
+ [] ->
+ ok;
+ Xs ->
+ [clear(pget(vhost, X),
+ pget(component, X),
+ pget(name, X),
+ ActingUser) || X <- Xs],
+ ok
+ end.
+
+clear_any(VHost, Component, Name, ActingUser) ->
+ Notify = fun () ->
+ case lookup_component(Component) of
+ {ok, Mod} -> event_notify(
+ parameter_cleared, VHost, Component,
+ [{name, Name},
+ {user_who_performed_action, ActingUser}]),
+ Mod:notify_clear(VHost, Component, Name, ActingUser);
+ _ -> ok
+ end
+ end,
+ case lookup(VHost, Component, Name) of
+ not_found -> {error_string, "Parameter does not exist"};
+ _ -> mnesia_clear(VHost, Component, Name),
+ case mnesia:is_transaction() of
+ true -> Notify;
+ false -> Notify()
+ end
+ end.
+
+mnesia_clear(VHost, Component, Name) ->
+ F = fun () ->
+ ok = mnesia:delete(?TABLE, {VHost, Component, Name}, write)
+ end,
+ ok = rabbit_misc:execute_mnesia_transaction(rabbit_vhost:with(VHost, F)).
+
+event_notify(_Event, _VHost, <<"policy">>, _Props) ->
+ ok;
+event_notify(Event, none, Component, Props) ->
+ rabbit_event:notify(Event, [{component, Component} | Props]);
+event_notify(Event, VHost, Component, Props) ->
+ rabbit_event:notify(Event, [{vhost, VHost},
+ {component, Component} | Props]).
+
+list() ->
+ [p(P) || #runtime_parameters{ key = {_VHost, Comp, _Name}} = P <-
+ rabbit_misc:dirty_read_all(?TABLE), Comp /= <<"policy">>].
+
+list(VHost) -> list(VHost, '_').
+list_component(Component) -> list('_', Component).
+
+%% Not dirty_match_object since that would not be transactional when used in a
+%% tx context
+list(VHost, Component) ->
+ mnesia:async_dirty(
+ fun () ->
+ case VHost of
+ '_' -> ok;
+ _ -> rabbit_vhost:assert(VHost)
+ end,
+ Match = #runtime_parameters{key = {VHost, Component, '_'},
+ _ = '_'},
+ [p(P) || #runtime_parameters{key = {_VHost, Comp, _Name}} = P <-
+ mnesia:match_object(?TABLE, Match, read),
+ Comp =/= <<"policy">> orelse Component =:= <<"policy">>]
+ end).
+
+list_global() ->
+ %% list only atom keys
+ mnesia:async_dirty(
+ fun () ->
+ Match = #runtime_parameters{key = '_', _ = '_'},
+ [p(P) || P <- mnesia:match_object(?TABLE, Match, read),
+ is_atom(P#runtime_parameters.key)]
+ end).
+
+list_formatted(VHost) ->
+ [ format_parameter(info_keys(), P) || P <- list(VHost) ].
+
+format_parameter(InfoKeys, P) ->
+ lists:foldr(fun
+ (value, Acc) ->
+ [{value, rabbit_json:encode(pget(value, P))} | Acc];
+ (Key, Acc) ->
+ case lists:keyfind(Key, 1, P) of
+ false -> Acc;
+ {Key, Val} -> [{Key, Val} | Acc]
+ end
+ end,
+ [], InfoKeys).
+
+list_formatted(VHost, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(
+ AggregatorPid, Ref,
+ fun(P) -> format_parameter(info_keys(), P) end, list(VHost)).
+
+list_global_formatted() ->
+ [ format_parameter(global_info_keys(), P) || P <- list_global() ].
+
+list_global_formatted(Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(
+ AggregatorPid, Ref,
+ fun(P) -> format_parameter(global_info_keys(), P) end, list_global()).
+
+lookup(VHost, Component, Name) ->
+ case lookup0({VHost, Component, Name}, rabbit_misc:const(not_found)) of
+ not_found -> not_found;
+ Params -> p(Params)
+ end.
+
+lookup_global(Name) ->
+ case lookup0(Name, rabbit_misc:const(not_found)) of
+ not_found -> not_found;
+ Params -> p(Params)
+ end.
+
+value(VHost, Comp, Name) -> value0({VHost, Comp, Name}).
+value(VHost, Comp, Name, Def) -> value0({VHost, Comp, Name}, Def).
+
+value_global(Key) ->
+ value0(Key).
+
+value_global(Key, Default) ->
+ value0(Key, Default).
+
+value0(Key) ->
+ case lookup0(Key, rabbit_misc:const(not_found)) of
+ not_found -> not_found;
+ Params -> Params#runtime_parameters.value
+ end.
+
+value0(Key, Default) ->
+ Params = lookup0(Key, fun () -> lookup_missing(Key, Default) end),
+ Params#runtime_parameters.value.
+
+lookup0(Key, DefaultFun) ->
+ case mnesia:dirty_read(?TABLE, Key) of
+ [] -> DefaultFun();
+ [R] -> R
+ end.
+
+lookup_missing(Key, Default) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ case mnesia:read(?TABLE, Key, read) of
+ [] -> Record = c(Key, Default),
+ mnesia:write(?TABLE, Record, write),
+ Record;
+ [R] -> R
+ end
+ end).
+
+c(Key, Default) ->
+ #runtime_parameters{key = Key,
+ value = Default}.
+
+p(#runtime_parameters{key = {VHost, Component, Name}, value = Value}) ->
+ [{vhost, VHost},
+ {component, Component},
+ {name, Name},
+ {value, Value}];
+
+p(#runtime_parameters{key = Key, value = Value}) when is_atom(Key) ->
+ [{name, Key},
+ {value, Value}].
+
+info_keys() -> [component, name, value].
+
+global_info_keys() -> [name, value].
+
+%%---------------------------------------------------------------------------
+
+lookup_component(Component) ->
+ case rabbit_registry:lookup_module(
+ runtime_parameter, list_to_atom(binary_to_list(Component))) of
+ {error, not_found} -> {errors,
+ [{"component ~s not found", [Component]}]};
+ {ok, Module} -> {ok, Module}
+ end.
+
+flatten_errors(L) ->
+ case [{F, A} || I <- lists:flatten([L]), {error, F, A} <- [I]] of
+ [] -> ok;
+ E -> {errors, E}
+ end.
diff --git a/deps/rabbit/src/rabbit_ssl.erl b/deps/rabbit/src/rabbit_ssl.erl
new file mode 100644
index 0000000000..84670b0a19
--- /dev/null
+++ b/deps/rabbit/src/rabbit_ssl.erl
@@ -0,0 +1,195 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_ssl).
+
+-include_lib("public_key/include/public_key.hrl").
+
+-export([peer_cert_issuer/1, peer_cert_subject/1, peer_cert_validity/1]).
+-export([peer_cert_subject_items/2, peer_cert_auth_name/1]).
+-export([cipher_suites_erlang/2, cipher_suites_erlang/1,
+ cipher_suites_openssl/2, cipher_suites_openssl/1,
+ cipher_suites/1]).
+
+%%--------------------------------------------------------------------------
+
+-export_type([certificate/0]).
+
+% Due to API differences between OTP releases.
+-dialyzer(no_missing_calls).
+-ignore_xref([{ssl_cipher_format, suite_legacy, 1},
+ {ssl_cipher_format, suite, 1},
+ {ssl_cipher_format, suite_to_str, 1},
+ {ssl_cipher_format, erl_suite_definition, 1},
+ {ssl_cipher_format, suite_map_to_openssl_str, 1},
+ {ssl_cipher_format, suite_map_to_bin, 1}]).
+
+-type certificate() :: rabbit_cert_info:certificate().
+
+-type cipher_suites_mode() :: default | all | anonymous.
+
+-spec cipher_suites(cipher_suites_mode()) -> ssl:ciphers().
+cipher_suites(Mode) ->
+ Version = get_highest_protocol_version(),
+ ssl:cipher_suites(Mode, Version).
+
+-spec cipher_suites_erlang(cipher_suites_mode()) ->
+ [ssl:old_cipher_suite()].
+cipher_suites_erlang(Mode) ->
+ Version = get_highest_protocol_version(),
+ cipher_suites_erlang(Mode, Version).
+
+-spec cipher_suites_erlang(cipher_suites_mode(),
+ ssl:protocol_version() | tls_record:tls_version()) ->
+ [ssl:old_cipher_suite()].
+cipher_suites_erlang(Mode, Version) ->
+ [ format_cipher_erlang(C)
+ || C <- ssl:cipher_suites(Mode, Version) ].
+
+-spec cipher_suites_openssl(cipher_suites_mode()) ->
+ [ssl:old_cipher_suite()].
+cipher_suites_openssl(Mode) ->
+ Version = get_highest_protocol_version(),
+ cipher_suites_openssl(Mode, Version).
+
+-spec cipher_suites_openssl(cipher_suites_mode(),
+ ssl:protocol_version() | tls_record:tls_version()) ->
+ [ssl:old_cipher_suite()].
+cipher_suites_openssl(Mode, Version) ->
+ lists:filtermap(fun(C) ->
+ OpenSSL = format_cipher_openssl(C),
+ case is_list(OpenSSL) of
+ true -> {true, OpenSSL};
+ false -> false
+ end
+ end,
+ ssl:cipher_suites(Mode, Version)).
+
+
+format_cipher_erlang(Cipher) ->
+ case erlang:function_exported(ssl_cipher_format, suite_map_to_bin, 1) of
+ true ->
+ format_cipher_erlang22(Cipher);
+ false ->
+ format_cipher_erlang21(Cipher)
+ end.
+
+format_cipher_erlang22(Cipher) ->
+ ssl_cipher_format:suite_legacy(ssl_cipher_format:suite_map_to_bin(Cipher)).
+
+format_cipher_erlang21(Cipher) ->
+ ssl_cipher_format:erl_suite_definition(ssl_cipher_format:suite(Cipher)).
+
+
+format_cipher_openssl(Cipher) ->
+ case erlang:function_exported(ssl_cipher_format, suite_map_to_bin, 1) of
+ true ->
+ format_cipher_openssl22(Cipher);
+ false ->
+ format_cipher_openssl21(Cipher)
+ end.
+
+format_cipher_openssl22(Cipher) ->
+ ssl_cipher_format:suite_map_to_openssl_str(Cipher).
+
+format_cipher_openssl21(Cipher) ->
+ ssl_cipher_format:suite_to_str(Cipher).
+
+-spec get_highest_protocol_version() -> tls_record:tls_atom_version().
+get_highest_protocol_version() ->
+ tls_record:protocol_version(
+ tls_record:highest_protocol_version([])).
+
+%%--------------------------------------------------------------------------
+%% High-level functions used by reader
+%%--------------------------------------------------------------------------
+
+%% Return a string describing the certificate's issuer.
+peer_cert_issuer(Cert) ->
+ rabbit_cert_info:issuer(Cert).
+
+%% Return a string describing the certificate's subject, as per RFC4514.
+peer_cert_subject(Cert) ->
+ rabbit_cert_info:subject(Cert).
+
+%% Return the parts of the certificate's subject.
+peer_cert_subject_items(Cert, Type) ->
+ rabbit_cert_info:subject_items(Cert, Type).
+
+%% Filters certificate SAN extensions by (OTP) SAN type name.
+peer_cert_subject_alternative_names(Cert, Type) ->
+ SANs = rabbit_cert_info:subject_alternative_names(Cert),
+ lists:filter(fun({Key, _}) -> Key =:= Type end, SANs).
+
+%% Return a string describing the certificate's validity.
+peer_cert_validity(Cert) ->
+ rabbit_cert_info:validity(Cert).
+
+%% Extract a username from the certificate
+-spec peer_cert_auth_name
+ (certificate()) -> binary() | 'not_found' | 'unsafe'.
+
+peer_cert_auth_name(Cert) ->
+ {ok, Mode} = application:get_env(rabbit, ssl_cert_login_from),
+ peer_cert_auth_name(Mode, Cert).
+
+peer_cert_auth_name(distinguished_name, Cert) ->
+ case auth_config_sane() of
+ true -> iolist_to_binary(peer_cert_subject(Cert));
+ false -> unsafe
+ end;
+
+peer_cert_auth_name(subject_alt_name, Cert) ->
+ peer_cert_auth_name(subject_alternative_name, Cert);
+
+peer_cert_auth_name(subject_alternative_name, Cert) ->
+ case auth_config_sane() of
+ true ->
+ Type = application:get_env(rabbit, ssl_cert_login_san_type, dns),
+ %% lists:nth/2 is 1-based
+ Index = application:get_env(rabbit, ssl_cert_login_san_index, 0) + 1,
+ OfType = peer_cert_subject_alternative_names(Cert, otp_san_type(Type)),
+ rabbit_log:debug("Peer certificate SANs of type ~s: ~p, index to use with lists:nth/2: ~b", [Type, OfType, Index]),
+ case length(OfType) of
+ 0 -> not_found;
+ N when N < Index -> not_found;
+ N when N >= Index ->
+ {_, Value} = lists:nth(Index, OfType),
+ rabbit_data_coercion:to_binary(Value)
+ end;
+ false -> unsafe
+ end;
+
+peer_cert_auth_name(common_name, Cert) ->
+ %% If there is more than one CN then we join them with "," in a
+ %% vaguely DN-like way. But this is more just so we do something
+ %% more intelligent than crashing, if you actually want to escape
+ %% things properly etc, use DN mode.
+ case auth_config_sane() of
+ true -> case peer_cert_subject_items(Cert, ?'id-at-commonName') of
+ not_found -> not_found;
+ CNs -> list_to_binary(string:join(CNs, ","))
+ end;
+ false -> unsafe
+ end.
+
+auth_config_sane() ->
+ {ok, Opts} = application:get_env(rabbit, ssl_options),
+ case proplists:get_value(verify, Opts) of
+ verify_peer -> true;
+ V -> rabbit_log:warning("TLS peer verification (authentication) is "
+ "disabled, ssl_options.verify value used: ~p. "
+ "See https://www.rabbitmq.com/ssl.html#peer-verification to learn more.", [V]),
+ false
+ end.
+
+otp_san_type(dns) -> dNSName;
+otp_san_type(ip) -> iPAddress;
+otp_san_type(email) -> rfc822Name;
+otp_san_type(uri) -> uniformResourceIdentifier;
+otp_san_type(other_name) -> otherName;
+otp_san_type(Other) -> Other.
diff --git a/deps/rabbit/src/rabbit_stream_coordinator.erl b/deps/rabbit/src/rabbit_stream_coordinator.erl
new file mode 100644
index 0000000000..9e4890c894
--- /dev/null
+++ b/deps/rabbit/src/rabbit_stream_coordinator.erl
@@ -0,0 +1,949 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at https://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% Copyright (c) 2012-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+-module(rabbit_stream_coordinator).
+
+-behaviour(ra_machine).
+
+-export([start/0]).
+-export([format_ra_event/2]).
+
+-export([init/1,
+ apply/3,
+ state_enter/2,
+ init_aux/1,
+ handle_aux/6,
+ tick/2]).
+
+-export([recover/0,
+ start_cluster/1,
+ delete_cluster/2,
+ add_replica/2,
+ delete_replica/2]).
+
+-export([policy_changed/1]).
+
+-export([phase_repair_mnesia/2,
+ phase_start_cluster/1,
+ phase_delete_cluster/2,
+ phase_check_quorum/1,
+ phase_start_new_leader/1,
+ phase_stop_replicas/1,
+ phase_start_replica/3,
+ phase_delete_replica/2]).
+
+-export([log_overview/1]).
+
+-define(STREAM_COORDINATOR_STARTUP, {stream_coordinator_startup, self()}).
+-define(TICK_TIMEOUT, 60000).
+-define(RESTART_TIMEOUT, 1000).
+-define(PHASE_RETRY_TIMEOUT, 10000).
+-define(CMD_TIMEOUT, 30000).
+
+-record(?MODULE, {streams, monitors}).
+
+start() ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ ServerId = {?MODULE, node()},
+ case ra:restart_server(ServerId) of
+ {error, Reason} when Reason == not_started orelse
+ Reason == name_not_registered ->
+ case ra:start_server(make_ra_conf(node(), Nodes)) of
+ ok ->
+ global:set_lock(?STREAM_COORDINATOR_STARTUP),
+ case find_members(Nodes) of
+ [] ->
+ %% We're the first (and maybe only) one
+ ra:trigger_election(ServerId);
+ Members ->
+ %% What to do if we get a timeout?
+ {ok, _, _} = ra:add_member(Members, ServerId, 30000)
+ end,
+ global:del_lock(?STREAM_COORDINATOR_STARTUP),
+ _ = ra:members(ServerId),
+ ok;
+ Error ->
+ exit(Error)
+ end;
+ ok ->
+ ok;
+ Error ->
+ exit(Error)
+ end.
+
+find_members([]) ->
+ [];
+find_members([Node | Nodes]) ->
+ case ra:members({?MODULE, Node}) of
+ {_, Members, _} ->
+ Members;
+ {error, noproc} ->
+ find_members(Nodes);
+ {timeout, _} ->
+ %% not sure what to do here
+ find_members(Nodes)
+ end.
+
+recover() ->
+ ra:restart_server({?MODULE, node()}).
+
+start_cluster(Q) ->
+ process_command({start_cluster, #{queue => Q}}).
+
+delete_cluster(StreamId, ActingUser) ->
+ process_command({delete_cluster, #{stream_id => StreamId, acting_user => ActingUser}}).
+
+add_replica(StreamId, Node) ->
+ process_command({start_replica, #{stream_id => StreamId, node => Node,
+ retries => 1}}).
+
+policy_changed(StreamId) ->
+ process_command({policy_changed, #{stream_id => StreamId}}).
+
+delete_replica(StreamId, Node) ->
+ process_command({delete_replica, #{stream_id => StreamId, node => Node}}).
+
+process_command(Cmd) ->
+ global:set_lock(?STREAM_COORDINATOR_STARTUP),
+ Servers = ensure_coordinator_started(),
+ global:del_lock(?STREAM_COORDINATOR_STARTUP),
+ process_command(Servers, Cmd).
+
+process_command([], _Cmd) ->
+ {error, coordinator_unavailable};
+process_command([Server | Servers], {CmdName, _} = Cmd) ->
+ case ra:process_command(Server, Cmd, ?CMD_TIMEOUT) of
+ {timeout, _} ->
+ rabbit_log:warning("Coordinator timeout on server ~p when processing command ~p",
+ [Server, CmdName]),
+ process_command(Servers, Cmd);
+ {error, noproc} ->
+ process_command(Servers, Cmd);
+ Reply ->
+ Reply
+ end.
+
+ensure_coordinator_started() ->
+ Local = {?MODULE, node()},
+ AllNodes = all_nodes(),
+ case ra:restart_server(Local) of
+ {error, Reason} when Reason == not_started orelse
+ Reason == name_not_registered ->
+ OtherNodes = all_nodes() -- [Local],
+ %% We can't use find_members/0 here as a process that timeouts means the cluster is up
+ case lists:filter(fun(N) -> global:whereis_name(N) =/= undefined end, OtherNodes) of
+ [] ->
+ start_coordinator_cluster();
+ _ ->
+ OtherNodes
+ end;
+ ok ->
+ AllNodes;
+ {error, {already_started, _}} ->
+ AllNodes;
+ _ ->
+ AllNodes
+ end.
+
+start_coordinator_cluster() ->
+ Nodes = rabbit_mnesia:cluster_nodes(running),
+ case ra:start_cluster([make_ra_conf(Node, Nodes) || Node <- Nodes]) of
+ {ok, Started, _} ->
+ Started;
+ {error, cluster_not_formed} ->
+ rabbit_log:warning("Stream coordinator cluster not formed", []),
+ []
+ end.
+
+all_nodes() ->
+ Nodes = rabbit_mnesia:cluster_nodes(running) -- [node()],
+ [{?MODULE, Node} || Node <- [node() | Nodes]].
+
+init(_Conf) ->
+ #?MODULE{streams = #{},
+ monitors = #{}}.
+
+apply(#{from := From}, {policy_changed, #{stream_id := StreamId}} = Cmd,
+ #?MODULE{streams = Streams0} = State) ->
+ case maps:get(StreamId, Streams0, undefined) of
+ undefined ->
+ {State, ok, []};
+ #{conf := Conf,
+ state := running} ->
+ case rabbit_stream_queue:update_stream_conf(Conf) of
+ Conf ->
+ %% No changes, ensure we only trigger an election if it's a must
+ {State, ok, []};
+ _ ->
+ {State, ok, [{mod_call, osiris_writer, stop, [Conf]}]}
+ end;
+ SState0 ->
+ Streams = maps:put(StreamId, add_pending_cmd(From, Cmd, SState0), Streams0),
+ {State#?MODULE{streams = Streams}, '$ra_no_reply', []}
+
+ end;
+apply(#{from := From}, {start_cluster, #{queue := Q}}, #?MODULE{streams = Streams} = State) ->
+ #{name := StreamId} = Conf0 = amqqueue:get_type_state(Q),
+ Conf = apply_leader_locator_strategy(Conf0, Streams),
+ case maps:is_key(StreamId, Streams) of
+ true ->
+ {State, '$ra_no_reply', wrap_reply(From, {error, already_started})};
+ false ->
+ Phase = phase_start_cluster,
+ PhaseArgs = [amqqueue:set_type_state(Q, Conf)],
+ SState = #{state => start_cluster,
+ phase => Phase,
+ phase_args => PhaseArgs,
+ conf => Conf,
+ reply_to => From,
+ pending_cmds => [],
+ pending_replicas => []},
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering phase_start_cluster", [StreamId]),
+ {State#?MODULE{streams = maps:put(StreamId, SState, Streams)}, '$ra_no_reply',
+ [{aux, {phase, StreamId, Phase, PhaseArgs}}]}
+ end;
+apply(_Meta, {start_cluster_reply, Q}, #?MODULE{streams = Streams,
+ monitors = Monitors0} = State) ->
+ #{name := StreamId,
+ leader_pid := LeaderPid,
+ replica_pids := ReplicaPids} = Conf = amqqueue:get_type_state(Q),
+ SState0 = maps:get(StreamId, Streams),
+ Phase = phase_repair_mnesia,
+ PhaseArgs = [new, Q],
+ SState = SState0#{conf => Conf,
+ phase => Phase,
+ phase_args => PhaseArgs},
+ Monitors = lists:foldl(fun(Pid, M) ->
+ maps:put(Pid, {StreamId, follower}, M)
+ end, maps:put(LeaderPid, {StreamId, leader}, Monitors0), ReplicaPids),
+ MonitorActions = [{monitor, process, Pid} || Pid <- ReplicaPids ++ [LeaderPid]],
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p "
+ "after start_cluster_reply", [StreamId, Phase]),
+ {State#?MODULE{streams = maps:put(StreamId, SState, Streams),
+ monitors = Monitors}, ok,
+ MonitorActions ++ [{aux, {phase, StreamId, Phase, PhaseArgs}}]};
+apply(_Meta, {start_replica_failed, StreamId, Node, Retries, Reply},
+ #?MODULE{streams = Streams0} = State) ->
+ rabbit_log:debug("rabbit_stream_coordinator: ~p start replica failed", [StreamId]),
+ case maps:get(StreamId, Streams0, undefined) of
+ undefined ->
+ {State, {error, not_found}, []};
+ #{pending_replicas := Pending,
+ reply_to := From} = SState ->
+ Streams = Streams0#{StreamId => clear_stream_state(SState#{pending_replicas =>
+ add_unique(Node, Pending)})},
+ reply_and_run_pending(
+ From, StreamId, ok, Reply,
+ [{timer, {pipeline,
+ [{start_replica, #{stream_id => StreamId,
+ node => Node,
+ from => undefined,
+ retries => Retries + 1}}]},
+ ?RESTART_TIMEOUT * Retries}],
+ State#?MODULE{streams = Streams})
+ end;
+apply(_Meta, {phase_finished, StreamId, Reply}, #?MODULE{streams = Streams0} = State) ->
+ rabbit_log:debug("rabbit_stream_coordinator: ~p phase finished", [StreamId]),
+ case maps:get(StreamId, Streams0, undefined) of
+ undefined ->
+ {State, {error, not_found}, []};
+ #{reply_to := From} = SState ->
+ Streams = Streams0#{StreamId => clear_stream_state(SState)},
+ reply_and_run_pending(From, StreamId, ok, Reply, [], State#?MODULE{streams = Streams})
+ end;
+apply(#{from := From}, {start_replica, #{stream_id := StreamId, node := Node,
+ retries := Retries}} = Cmd,
+ #?MODULE{streams = Streams0} = State) ->
+ case maps:get(StreamId, Streams0, undefined) of
+ undefined ->
+ case From of
+ undefined ->
+ {State, ok, []};
+ _ ->
+ {State, '$ra_no_reply', wrap_reply(From, {error, not_found})}
+ end;
+ #{conf := Conf,
+ state := running} = SState0 ->
+ Phase = phase_start_replica,
+ PhaseArgs = [Node, Conf, Retries],
+ SState = update_stream_state(From, start_replica, Phase, PhaseArgs, SState0),
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p on node ~p",
+ [StreamId, Phase, Node]),
+ {State#?MODULE{streams = Streams0#{StreamId => SState}}, '$ra_no_reply',
+ [{aux, {phase, StreamId, Phase, PhaseArgs}}]};
+ SState0 ->
+ Streams = maps:put(StreamId, add_pending_cmd(From, Cmd, SState0), Streams0),
+ {State#?MODULE{streams = Streams}, '$ra_no_reply', []}
+ end;
+apply(_Meta, {start_replica_reply, StreamId, Pid},
+ #?MODULE{streams = Streams, monitors = Monitors0} = State) ->
+ case maps:get(StreamId, Streams, undefined) of
+ undefined ->
+ {State, {error, not_found}, []};
+ #{conf := Conf0} = SState0 ->
+ #{replica_nodes := Replicas0,
+ replica_pids := ReplicaPids0} = Conf0,
+ {ReplicaPids, MaybePid} = delete_replica_pid(node(Pid), ReplicaPids0),
+ Conf = Conf0#{replica_pids => [Pid | ReplicaPids],
+ replica_nodes => add_unique(node(Pid), Replicas0)},
+ Phase = phase_repair_mnesia,
+ PhaseArgs = [update, Conf],
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p after start replica", [StreamId, Phase]),
+ #{pending_replicas := Pending} = SState0 = maps:get(StreamId, Streams),
+ SState = SState0#{conf => Conf,
+ phase => Phase,
+ phase_args => PhaseArgs,
+ pending_replicas => lists:delete(node(Pid), Pending)},
+ Monitors1 = Monitors0#{Pid => {StreamId, follower}},
+ Monitors = case MaybePid of
+ [P] -> maps:remove(P, Monitors1);
+ _ -> Monitors1
+ end,
+ {State#?MODULE{streams = Streams#{StreamId => SState},
+ monitors = Monitors}, ok,
+ [{monitor, process, Pid}, {aux, {phase, StreamId, Phase, PhaseArgs}}]}
+ end;
+apply(#{from := From}, {delete_replica, #{stream_id := StreamId, node := Node}} = Cmd,
+ #?MODULE{streams = Streams0,
+ monitors = Monitors0} = State) ->
+ case maps:get(StreamId, Streams0, undefined) of
+ undefined ->
+ {State, '$ra_no_reply', wrap_reply(From, {error, not_found})};
+ #{conf := Conf0,
+ state := running,
+ pending_replicas := Pending0} = SState0 ->
+ Replicas0 = maps:get(replica_nodes, Conf0),
+ ReplicaPids0 = maps:get(replica_pids, Conf0),
+ case lists:member(Node, Replicas0) of
+ false ->
+ reply_and_run_pending(From, StreamId, '$ra_no_reply', ok, [], State);
+ true ->
+ [Pid] = lists:filter(fun(P) -> node(P) == Node end, ReplicaPids0),
+ ReplicaPids = lists:delete(Pid, ReplicaPids0),
+ Replicas = lists:delete(Node, Replicas0),
+ Pending = lists:delete(Node, Pending0),
+ Conf = Conf0#{replica_pids => ReplicaPids,
+ replica_nodes => Replicas},
+ Phase = phase_delete_replica,
+ PhaseArgs = [Node, Conf],
+ SState = update_stream_state(From, delete_replica,
+ Phase, PhaseArgs,
+ SState0#{conf => Conf0,
+ pending_replicas => Pending}),
+ Monitors = maps:remove(Pid, Monitors0),
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p on node ~p", [StreamId, Phase, Node]),
+ {State#?MODULE{monitors = Monitors,
+ streams = Streams0#{StreamId => SState}},
+ '$ra_no_reply',
+ [{demonitor, process, Pid},
+ {aux, {phase, StreamId, Phase, PhaseArgs}}]}
+ end;
+ SState0 ->
+ Streams = maps:put(StreamId, add_pending_cmd(From, Cmd, SState0), Streams0),
+ {State#?MODULE{streams = Streams}, '$ra_no_reply', []}
+ end;
+apply(#{from := From}, {delete_cluster, #{stream_id := StreamId,
+ acting_user := ActingUser}} = Cmd,
+ #?MODULE{streams = Streams0, monitors = Monitors0} = State) ->
+ case maps:get(StreamId, Streams0, undefined) of
+ undefined ->
+ {State, '$ra_no_reply', wrap_reply(From, {ok, 0})};
+ #{conf := Conf,
+ state := running} = SState0 ->
+ ReplicaPids = maps:get(replica_pids, Conf),
+ LeaderPid = maps:get(leader_pid, Conf),
+ Monitors = lists:foldl(fun(Pid, M) ->
+ maps:remove(Pid, M)
+ end, Monitors0, ReplicaPids ++ [LeaderPid]),
+ Phase = phase_delete_cluster,
+ PhaseArgs = [Conf, ActingUser],
+ SState = update_stream_state(From, delete_cluster, Phase, PhaseArgs, SState0),
+ Demonitors = [{demonitor, process, Pid} || Pid <- [LeaderPid | ReplicaPids]],
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p",
+ [StreamId, Phase]),
+ {State#?MODULE{monitors = Monitors,
+ streams = Streams0#{StreamId => SState}}, '$ra_no_reply',
+ Demonitors ++ [{aux, {phase, StreamId, Phase, PhaseArgs}}]};
+ SState0 ->
+ Streams = maps:put(StreamId, add_pending_cmd(From, Cmd, SState0), Streams0),
+ {State#?MODULE{streams = Streams}, '$ra_no_reply', []}
+ end;
+apply(_Meta, {delete_cluster_reply, StreamId}, #?MODULE{streams = Streams} = State0) ->
+ #{reply_to := From,
+ pending_cmds := Pending} = maps:get(StreamId, Streams),
+ State = State0#?MODULE{streams = maps:remove(StreamId, Streams)},
+ rabbit_log:debug("rabbit_stream_coordinator: ~p finished delete_cluster_reply",
+ [StreamId]),
+ Actions = [{ra, pipeline_command, [{?MODULE, node()}, Cmd]} || Cmd <- Pending],
+ {State, ok, Actions ++ wrap_reply(From, {ok, 0})};
+apply(_Meta, {down, Pid, _Reason} = Cmd, #?MODULE{streams = Streams,
+ monitors = Monitors0} = State) ->
+ case maps:get(Pid, Monitors0, undefined) of
+ {StreamId, Role} ->
+ Monitors = maps:remove(Pid, Monitors0),
+ case maps:get(StreamId, Streams, undefined) of
+ #{state := delete_cluster} ->
+ {State#?MODULE{monitors = Monitors}, ok, []};
+ undefined ->
+ {State#?MODULE{monitors = Monitors}, ok, []};
+ #{state := running,
+ conf := #{replica_pids := Pids} = Conf0,
+ pending_cmds := Pending0} = SState0 ->
+ case Role of
+ leader ->
+ rabbit_log:info("rabbit_stream_coordinator: ~p leader is down, starting election", [StreamId]),
+ Phase = phase_stop_replicas,
+ PhaseArgs = [Conf0],
+ SState = update_stream_state(undefined, leader_election, Phase, PhaseArgs, SState0),
+ Events = [{demonitor, process, P} || P <- Pids],
+ Monitors1 = lists:foldl(fun(P, M) ->
+ maps:remove(P, M)
+ end, Monitors, Pids),
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p", [StreamId, Phase]),
+ {State#?MODULE{monitors = Monitors1,
+ streams = Streams#{StreamId => SState}},
+ ok, Events ++ [{aux, {phase, StreamId, Phase, PhaseArgs}}]};
+ follower ->
+ case rabbit_misc:is_process_alive(maps:get(leader_pid, Conf0)) of
+ true ->
+ Phase = phase_start_replica,
+ PhaseArgs = [node(Pid), Conf0, 1],
+ SState = update_stream_state(undefined,
+ replica_restart,
+ Phase, PhaseArgs,
+ SState0),
+ rabbit_log:debug("rabbit_stream_coordinator: ~p replica on node ~p is down, entering ~p", [StreamId, node(Pid), Phase]),
+ {State#?MODULE{monitors = Monitors,
+ streams = Streams#{StreamId => SState}},
+ ok, [{aux, {phase, StreamId, Phase, PhaseArgs}}]};
+ false ->
+ SState = SState0#{pending_cmds => Pending0 ++ [Cmd]},
+ reply_and_run_pending(undefined, StreamId, ok, ok, [], State#?MODULE{streams = Streams#{StreamId => SState}})
+ end
+ end;
+ #{pending_cmds := Pending0} = SState0 ->
+ SState = SState0#{pending_cmds => Pending0 ++ [Cmd]},
+ {State#?MODULE{streams = Streams#{StreamId => SState}}, ok, []}
+ end;
+ undefined ->
+ {State, ok, []}
+ end;
+apply(_Meta, {start_leader_election, StreamId, NewEpoch, Offsets},
+ #?MODULE{streams = Streams} = State) ->
+ #{conf := Conf0} = SState0 = maps:get(StreamId, Streams),
+ #{leader_node := Leader,
+ replica_nodes := Replicas,
+ replica_pids := ReplicaPids0} = Conf0,
+ NewLeader = find_max_offset(Offsets),
+ rabbit_log:info("rabbit_stream_coordinator: ~p starting new leader on node ~p",
+ [StreamId, NewLeader]),
+ {ReplicaPids, _} = delete_replica_pid(NewLeader, ReplicaPids0),
+ Conf = rabbit_stream_queue:update_stream_conf(
+ Conf0#{epoch => NewEpoch,
+ leader_node => NewLeader,
+ replica_nodes => lists:delete(NewLeader, Replicas ++ [Leader]),
+ replica_pids => ReplicaPids}),
+ Phase = phase_start_new_leader,
+ PhaseArgs = [Conf],
+ SState = SState0#{conf => Conf,
+ phase => Phase,
+ phase_args => PhaseArgs},
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering phase_start_new_leader",
+ [StreamId]),
+ {State#?MODULE{streams = Streams#{StreamId => SState}}, ok,
+ [{aux, {phase, StreamId, Phase, PhaseArgs}}]};
+apply(_Meta, {leader_elected, StreamId, NewLeaderPid},
+ #?MODULE{streams = Streams, monitors = Monitors0} = State) ->
+ rabbit_log:info("rabbit_stream_coordinator: ~p leader elected", [StreamId]),
+ #{conf := Conf0,
+ pending_cmds := Pending0} = SState0 = maps:get(StreamId, Streams),
+ #{leader_pid := LeaderPid,
+ replica_nodes := Replicas} = Conf0,
+ Conf = Conf0#{leader_pid => NewLeaderPid},
+ Phase = phase_repair_mnesia,
+ PhaseArgs = [update, Conf],
+ Pending = Pending0 ++ [{start_replica, #{stream_id => StreamId, node => R,
+ retries => 1, from => undefined}}
+ || R <- Replicas],
+ SState = SState0#{conf => Conf,
+ phase => Phase,
+ phase_args => PhaseArgs,
+ pending_replicas => Replicas,
+ pending_cmds => Pending},
+ Monitors = maps:put(NewLeaderPid, {StreamId, leader}, maps:remove(LeaderPid, Monitors0)),
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p after "
+ "leader election", [StreamId, Phase]),
+ {State#?MODULE{streams = Streams#{StreamId => SState},
+ monitors = Monitors}, ok,
+ [{monitor, process, NewLeaderPid},
+ {aux, {phase, StreamId, Phase, PhaseArgs}}]};
+apply(_Meta, {replicas_stopped, StreamId}, #?MODULE{streams = Streams} = State) ->
+ case maps:get(StreamId, Streams, undefined) of
+ undefined ->
+ {State, {error, not_found}, []};
+ #{conf := Conf0} = SState0 ->
+ Phase = phase_check_quorum,
+ Conf = Conf0#{replica_pids => []},
+ PhaseArgs = [Conf],
+ SState = SState0#{conf => Conf,
+ phase => Phase,
+ phase_args => PhaseArgs},
+ rabbit_log:info("rabbit_stream_coordinator: ~p all replicas have been stopped, "
+ "checking quorum available", [StreamId]),
+ {State#?MODULE{streams = Streams#{StreamId => SState}}, ok,
+ [{aux, {phase, StreamId, Phase, PhaseArgs}}]}
+ end;
+apply(_Meta, {stream_updated, #{name := StreamId} = Conf}, #?MODULE{streams = Streams} = State) ->
+ SState0 = maps:get(StreamId, Streams),
+ Phase = phase_repair_mnesia,
+ PhaseArgs = [update, Conf],
+ SState = SState0#{conf => Conf,
+ phase => Phase,
+ phase_args => PhaseArgs},
+ rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p after"
+ " stream_updated", [StreamId, Phase]),
+ {State#?MODULE{streams = Streams#{StreamId => SState}}, ok,
+ [{aux, {phase, StreamId, Phase, PhaseArgs}}]};
+apply(_, {timeout, {pipeline, Cmds}}, State) ->
+ Actions = [{mod_call, ra, pipeline_command, [{?MODULE, node()}, Cmd]} || Cmd <- Cmds],
+ {State, ok, Actions};
+apply(_, {timeout, {aux, Cmd}}, State) ->
+ {State, ok, [{aux, Cmd}]};
+apply(Meta, {_, #{from := From}} = Cmd, State) ->
+ ?MODULE:apply(Meta#{from => From}, Cmd, State).
+
+state_enter(leader, #?MODULE{streams = Streams, monitors = Monitors}) ->
+ maps:fold(fun(_, #{conf := #{name := StreamId},
+ pending_replicas := Pending,
+ state := State,
+ phase := Phase,
+ phase_args := PhaseArgs}, Acc) ->
+ restart_aux_phase(State, Phase, PhaseArgs, StreamId) ++
+ pipeline_restart_replica_cmds(StreamId, Pending) ++
+ Acc
+ end, [{monitor, process, P} || P <- maps:keys(Monitors)], Streams);
+state_enter(follower, #?MODULE{monitors = Monitors}) ->
+ [{monitor, process, P} || P <- maps:keys(Monitors)];
+state_enter(recover, _) ->
+ put('$rabbit_vm_category', ?MODULE),
+ [];
+state_enter(_, _) ->
+ [].
+
+restart_aux_phase(running, _, _, _) ->
+ [];
+restart_aux_phase(_State, Phase, PhaseArgs, StreamId) ->
+ [{aux, {phase, StreamId, Phase, PhaseArgs}}].
+
+pipeline_restart_replica_cmds(StreamId, Pending) ->
+ [{timer, {pipeline, [{start_replica, #{stream_id => StreamId,
+ node => Node,
+ from => undefined,
+ retries => 1}}
+ || Node <- Pending]}, ?RESTART_TIMEOUT}].
+
+tick(_Ts, _State) ->
+ [{aux, maybe_resize_coordinator_cluster}].
+
+maybe_resize_coordinator_cluster() ->
+ spawn(fun() ->
+ case ra:members({?MODULE, node()}) of
+ {_, Members, _} ->
+ MemberNodes = [Node || {_, Node} <- Members],
+ Running = rabbit_mnesia:cluster_nodes(running),
+ All = rabbit_mnesia:cluster_nodes(all),
+ case Running -- MemberNodes of
+ [] ->
+ ok;
+ New ->
+ rabbit_log:warning("New rabbit node(s) detected, "
+ "adding stream coordinator in: ~p", [New]),
+ add_members(Members, New)
+ end,
+ case MemberNodes -- All of
+ [] ->
+ ok;
+ Old ->
+ rabbit_log:warning("Rabbit node(s) removed from the cluster, "
+ "deleting stream coordinator in: ~p", [Old]),
+ remove_members(Members, Old)
+ end;
+ _ ->
+ ok
+ end
+ end).
+
+add_members(_, []) ->
+ ok;
+add_members(Members, [Node | Nodes]) ->
+ Conf = make_ra_conf(Node, [N || {_, N} <- Members]),
+ case ra:start_server(Conf) of
+ ok ->
+ case ra:add_member(Members, {?MODULE, Node}) of
+ {ok, NewMembers, _} ->
+ add_members(NewMembers, Nodes);
+ _ ->
+ add_members(Members, Nodes)
+ end;
+ Error ->
+ rabbit_log:warning("Stream coordinator failed to start on node ~p : ~p",
+ [Node, Error]),
+ add_members(Members, Nodes)
+ end.
+
+remove_members(_, []) ->
+ ok;
+remove_members(Members, [Node | Nodes]) ->
+ case ra:remove_member(Members, {?MODULE, Node}) of
+ {ok, NewMembers, _} ->
+ remove_members(NewMembers, Nodes);
+ _ ->
+ remove_members(Members, Nodes)
+ end.
+
+init_aux(_Name) ->
+ {#{}, undefined}.
+
+%% TODO ensure the dead writer is restarted as a replica at some point in time, increasing timeout?
+handle_aux(leader, _, maybe_resize_coordinator_cluster, {Monitors, undefined}, LogState, _) ->
+ Pid = maybe_resize_coordinator_cluster(),
+ {no_reply, {Monitors, Pid}, LogState, [{monitor, process, aux, Pid}]};
+handle_aux(leader, _, maybe_resize_coordinator_cluster, AuxState, LogState, _) ->
+ %% Coordinator resizing is still happening, let's ignore this tick event
+ {no_reply, AuxState, LogState};
+handle_aux(leader, _, {down, Pid, _}, {Monitors, Pid}, LogState, _) ->
+ %% Coordinator resizing has finished
+ {no_reply, {Monitors, undefined}, LogState};
+handle_aux(leader, _, {phase, _, Fun, Args} = Cmd, {Monitors, Coordinator}, LogState, _) ->
+ Pid = erlang:apply(?MODULE, Fun, Args),
+ Actions = [{monitor, process, aux, Pid}],
+ {no_reply, {maps:put(Pid, Cmd, Monitors), Coordinator}, LogState, Actions};
+handle_aux(leader, _, {down, Pid, normal}, {Monitors, Coordinator}, LogState, _) ->
+ {no_reply, {maps:remove(Pid, Monitors), Coordinator}, LogState};
+handle_aux(leader, _, {down, Pid, Reason}, {Monitors0, Coordinator}, LogState, _) ->
+ %% The phase has failed, let's retry it
+ case maps:get(Pid, Monitors0) of
+ {phase, StreamId, phase_start_new_leader, Args} ->
+ rabbit_log:warning("Error while starting new leader for stream queue ~p, "
+ "restarting election: ~p", [StreamId, Reason]),
+ Monitors = maps:remove(Pid, Monitors0),
+ Cmd = {phase, StreamId, phase_check_quorum, Args},
+ {no_reply, {Monitors, Coordinator}, LogState, [{timer, {aux, Cmd}, ?PHASE_RETRY_TIMEOUT}]};
+ {phase, StreamId, Fun, _} = Cmd ->
+ rabbit_log:warning("Error while executing coordinator phase ~p for stream queue ~p ~p",
+ [Fun, StreamId, Reason]),
+ Monitors = maps:remove(Pid, Monitors0),
+ {no_reply, {Monitors, Coordinator}, LogState, [{timer, {aux, Cmd}, ?PHASE_RETRY_TIMEOUT}]}
+ end;
+handle_aux(_, _, _, AuxState, LogState, _) ->
+ {no_reply, AuxState, LogState}.
+
+reply_and_run_pending(From, StreamId, Reply, WrapReply, Actions0, #?MODULE{streams = Streams} = State) ->
+ #{pending_cmds := Pending} = SState0 = maps:get(StreamId, Streams),
+ AuxActions = [{mod_call, ra, pipeline_command, [{?MODULE, node()}, Cmd]}
+ || Cmd <- Pending],
+ SState = maps:put(pending_cmds, [], SState0),
+ Actions = case From of
+ undefined ->
+ AuxActions ++ Actions0;
+ _ ->
+ wrap_reply(From, WrapReply) ++ AuxActions ++ Actions0
+ end,
+ {State#?MODULE{streams = Streams#{StreamId => SState}}, Reply, Actions}.
+
+wrap_reply(From, Reply) ->
+ [{reply, From, {wrap_reply, Reply}}].
+
+add_pending_cmd(From, {CmdName, CmdMap}, #{pending_cmds := Pending0} = StreamState) ->
+ %% Remove from pending the leader election and automatic replica restart when
+ %% the command is delete_cluster
+ Pending = case CmdName of
+ delete_cluster ->
+ lists:filter(fun({down, _, _}) ->
+ false;
+ (_) ->
+ true
+ end, Pending0);
+ _ ->
+ Pending0
+ end,
+ maps:put(pending_cmds, Pending ++ [{CmdName, maps:put(from, From, CmdMap)}],
+ StreamState).
+
+clear_stream_state(StreamState) ->
+ StreamState#{reply_to => undefined,
+ state => running,
+ phase => undefined,
+ phase_args => undefined}.
+
+update_stream_state(From, State, Phase, PhaseArgs, StreamState) ->
+ StreamState#{reply_to => From,
+ state => State,
+ phase => Phase,
+ phase_args => PhaseArgs}.
+
+phase_start_replica(Node, #{name := StreamId} = Conf0,
+ Retries) ->
+ spawn(
+ fun() ->
+ %% If a new leader hasn't yet been elected, this will fail with a badmatch
+ %% as get_reader_context returns a no proc. An unhandled failure will
+ %% crash this monitored process and restart it later.
+ %% TODO However, do we want that crash in the log? We might need to try/catch
+ %% to provide a log message instead as it's 'expected'. We could try to
+ %% verify first that the leader is alive, but there would still be potential
+ %% for a race condition in here.
+ try
+ case osiris_replica:start(Node, Conf0) of
+ {ok, Pid} ->
+ ra:pipeline_command({?MODULE, node()},
+ {start_replica_reply, StreamId, Pid});
+ {error, already_present} ->
+ ra:pipeline_command({?MODULE, node()}, {phase_finished, StreamId, ok});
+ {error, {already_started, _}} ->
+ ra:pipeline_command({?MODULE, node()}, {phase_finished, StreamId, ok});
+ {error, Reason} = Error ->
+ rabbit_log:warning("Error while starting replica for ~p : ~p",
+ [maps:get(name, Conf0), Reason]),
+ ra:pipeline_command({?MODULE, node()},
+ {start_replica_failed, StreamId, Node, Retries, Error})
+ end
+ catch _:E->
+ rabbit_log:warning("Error while starting replica for ~p : ~p",
+ [maps:get(name, Conf0), E]),
+ ra:pipeline_command({?MODULE, node()},
+ {start_replica_failed, StreamId, Node, Retries, {error, E}})
+ end
+ end).
+
+phase_delete_replica(Node, Conf) ->
+ spawn(
+ fun() ->
+ ok = osiris_replica:delete(Node, Conf),
+ ra:pipeline_command({?MODULE, node()}, {stream_updated, Conf})
+ end).
+
+phase_stop_replicas(#{replica_nodes := Replicas,
+ name := StreamId} = Conf) ->
+ spawn(
+ fun() ->
+ [try
+ osiris_replica:stop(Node, Conf)
+ catch _:{{nodedown, _}, _} ->
+ %% It could be the old leader that is still down, it's normal.
+ ok
+ end || Node <- Replicas],
+ ra:pipeline_command({?MODULE, node()}, {replicas_stopped, StreamId})
+ end).
+
+phase_start_new_leader(#{name := StreamId, leader_node := Node, leader_pid := LPid} = Conf) ->
+ spawn(fun() ->
+ osiris_replica:stop(Node, Conf),
+ %% If the start fails, the monitor will capture the crash and restart it
+ case osiris_writer:start(Conf) of
+ {ok, Pid} ->
+ ra:pipeline_command({?MODULE, node()},
+ {leader_elected, StreamId, Pid});
+ {error, already_present} ->
+ ra:pipeline_command({?MODULE, node()},
+ {leader_elected, StreamId, LPid});
+ {error, {already_started, Pid}} ->
+ ra:pipeline_command({?MODULE, node()},
+ {leader_elected, StreamId, Pid})
+ end
+ end).
+
+phase_check_quorum(#{name := StreamId,
+ epoch := Epoch,
+ replica_nodes := Nodes} = Conf) ->
+ spawn(fun() ->
+ Offsets = find_replica_offsets(Conf),
+ case is_quorum(length(Nodes) + 1, length(Offsets)) of
+ true ->
+ ra:pipeline_command({?MODULE, node()},
+ {start_leader_election, StreamId, Epoch + 1, Offsets});
+ false ->
+ %% Let's crash this process so the monitor will restart it
+ exit({not_enough_quorum, StreamId})
+ end
+ end).
+
+find_replica_offsets(#{replica_nodes := Nodes,
+ leader_node := Leader} = Conf) ->
+ lists:foldl(
+ fun(Node, Acc) ->
+ try
+ %% osiris_log:overview/1 needs the directory - last item of the list
+ case rpc:call(Node, rabbit, is_running, []) of
+ false ->
+ Acc;
+ true ->
+ case rpc:call(Node, ?MODULE, log_overview, [Conf]) of
+ {badrpc, nodedown} ->
+ Acc;
+ {_Range, Offsets} ->
+ [{Node, select_highest_offset(Offsets)} | Acc]
+ end
+ end
+ catch
+ _:_ ->
+ Acc
+ end
+ end, [], Nodes ++ [Leader]).
+
+select_highest_offset([]) ->
+ empty;
+select_highest_offset(Offsets) ->
+ lists:last(Offsets).
+
+log_overview(Config) ->
+ Dir = osiris_log:directory(Config),
+ osiris_log:overview(Dir).
+
+find_max_offset(Offsets) ->
+ [{Node, _} | _] = lists:sort(fun({_, {Ao, E}}, {_, {Bo, E}}) ->
+ Ao >= Bo;
+ ({_, {_, Ae}}, {_, {_, Be}}) ->
+ Ae >= Be;
+ ({_, empty}, _) ->
+ false;
+ (_, {_, empty}) ->
+ true
+ end, Offsets),
+ Node.
+
+is_quorum(1, 1) ->
+ true;
+is_quorum(NumReplicas, NumAlive) ->
+ NumAlive >= ((NumReplicas div 2) + 1).
+
+phase_repair_mnesia(new, Q) ->
+ spawn(fun() ->
+ Reply = rabbit_amqqueue:internal_declare(Q, false),
+ #{name := StreamId} = amqqueue:get_type_state(Q),
+ ra:pipeline_command({?MODULE, node()}, {phase_finished, StreamId, Reply})
+ end);
+
+phase_repair_mnesia(update, #{reference := QName,
+ leader_pid := LeaderPid,
+ name := StreamId} = Conf) ->
+ Fun = fun (Q) ->
+ amqqueue:set_type_state(amqqueue:set_pid(Q, LeaderPid), Conf)
+ end,
+ spawn(fun() ->
+ case rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ rabbit_amqqueue:update(QName, Fun)
+ end) of
+ not_found ->
+ %% This can happen during recovery
+ [Q] = mnesia:dirty_read(rabbit_durable_queue, QName),
+ rabbit_amqqueue:ensure_rabbit_queue_record_is_initialized(Fun(Q));
+ _ ->
+ ok
+ end,
+ ra:pipeline_command({?MODULE, node()}, {phase_finished, StreamId, ok})
+ end).
+
+phase_start_cluster(Q0) ->
+ spawn(
+ fun() ->
+ case osiris:start_cluster(amqqueue:get_type_state(Q0)) of
+ {ok, #{leader_pid := Pid} = Conf} ->
+ Q = amqqueue:set_type_state(amqqueue:set_pid(Q0, Pid), Conf),
+ ra:pipeline_command({?MODULE, node()}, {start_cluster_reply, Q});
+ {error, {already_started, _}} ->
+ ra:pipeline_command({?MODULE, node()}, {start_cluster_finished, {error, already_started}})
+ end
+ end).
+
+phase_delete_cluster(#{name := StreamId,
+ reference := QName} = Conf, ActingUser) ->
+ spawn(
+ fun() ->
+ ok = osiris:delete_cluster(Conf),
+ _ = rabbit_amqqueue:internal_delete(QName, ActingUser),
+ ra:pipeline_command({?MODULE, node()}, {delete_cluster_reply, StreamId})
+ end).
+
+format_ra_event(ServerId, Evt) ->
+ {stream_coordinator_event, ServerId, Evt}.
+
+make_ra_conf(Node, Nodes) ->
+ UId = ra:new_uid(ra_lib:to_binary(?MODULE)),
+ Formatter = {?MODULE, format_ra_event, []},
+ Members = [{?MODULE, N} || N <- Nodes],
+ TickTimeout = application:get_env(rabbit, stream_tick_interval,
+ ?TICK_TIMEOUT),
+ #{cluster_name => ?MODULE,
+ id => {?MODULE, Node},
+ uid => UId,
+ friendly_name => atom_to_list(?MODULE),
+ metrics_key => ?MODULE,
+ initial_members => Members,
+ log_init_args => #{uid => UId},
+ tick_timeout => TickTimeout,
+ machine => {module, ?MODULE, #{}},
+ ra_event_formatter => Formatter}.
+
+add_unique(Node, Nodes) ->
+ case lists:member(Node, Nodes) of
+ true ->
+ Nodes;
+ _ ->
+ [Node | Nodes]
+ end.
+
+delete_replica_pid(Node, ReplicaPids) ->
+ lists:partition(fun(P) -> node(P) =/= Node end, ReplicaPids).
+
+apply_leader_locator_strategy(#{leader_locator_strategy := <<"client-local">>} = Conf, _) ->
+ Conf;
+apply_leader_locator_strategy(#{leader_node := Leader,
+ replica_nodes := Replicas0,
+ leader_locator_strategy := <<"random">>,
+ name := StreamId} = Conf, _) ->
+ Replicas = [Leader | Replicas0],
+ ClusterSize = length(Replicas),
+ Hash = erlang:phash2(StreamId),
+ Pos = (Hash rem ClusterSize) + 1,
+ NewLeader = lists:nth(Pos, Replicas),
+ NewReplicas = lists:delete(NewLeader, Replicas),
+ Conf#{leader_node => NewLeader,
+ replica_nodes => NewReplicas};
+apply_leader_locator_strategy(#{leader_node := Leader,
+ replica_nodes := Replicas0,
+ leader_locator_strategy := <<"least-leaders">>} = Conf,
+ Streams) ->
+ Replicas = [Leader | Replicas0],
+ Counters0 = maps:from_list([{R, 0} || R <- Replicas]),
+ Counters = maps:to_list(maps:fold(fun(_Key, #{conf := #{leader_node := L}}, Acc) ->
+ maps:update_with(L, fun(V) -> V + 1 end, 0, Acc)
+ end, Counters0, Streams)),
+ Ordered = lists:sort(fun({_, V1}, {_, V2}) ->
+ V1 =< V2
+ end, Counters),
+ %% We could have potentially introduced nodes that are not in the list of replicas if
+ %% initial cluster size is smaller than the cluster size. Let's select the first one
+ %% that is on the list of replicas
+ NewLeader = select_first_matching_node(Ordered, Replicas),
+ NewReplicas = lists:delete(NewLeader, Replicas),
+ Conf#{leader_node => NewLeader,
+ replica_nodes => NewReplicas}.
+
+select_first_matching_node([{N, _} | Rest], Replicas) ->
+ case lists:member(N, Replicas) of
+ true -> N;
+ false -> select_first_matching_node(Rest, Replicas)
+ end.
diff --git a/deps/rabbit/src/rabbit_stream_queue.erl b/deps/rabbit/src/rabbit_stream_queue.erl
new file mode 100644
index 0000000000..4e428495b0
--- /dev/null
+++ b/deps/rabbit/src/rabbit_stream_queue.erl
@@ -0,0 +1,734 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at https://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% Copyright (c) 2012-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_stream_queue).
+
+-behaviour(rabbit_queue_type).
+
+-export([is_enabled/0,
+ declare/2,
+ delete/4,
+ purge/1,
+ policy_changed/1,
+ recover/2,
+ is_recoverable/1,
+ consume/3,
+ cancel/5,
+ handle_event/2,
+ deliver/2,
+ settle/4,
+ credit/4,
+ dequeue/4,
+ info/2,
+ init/1,
+ close/1,
+ update/2,
+ state_info/1,
+ stat/1,
+ capabilities/0]).
+
+-export([set_retention_policy/3]).
+-export([add_replica/3,
+ delete_replica/3]).
+-export([format_osiris_event/2]).
+-export([update_stream_conf/1]).
+
+-include("rabbit.hrl").
+-include("amqqueue.hrl").
+
+-define(INFO_KEYS, [name, durable, auto_delete, arguments, leader, members, online, state,
+ messages, messages_ready, messages_unacknowledged, committed_offset,
+ policy, operator_policy, effective_policy_definition, type]).
+
+-type appender_seq() :: non_neg_integer().
+
+-record(stream, {name :: rabbit_types:r('queue'),
+ credit :: integer(),
+ max :: non_neg_integer(),
+ start_offset = 0 :: non_neg_integer(),
+ listening_offset = 0 :: non_neg_integer(),
+ log :: undefined | osiris_log:state()}).
+
+-record(stream_client, {name :: term(),
+ leader :: pid(),
+ next_seq = 1 :: non_neg_integer(),
+ correlation = #{} :: #{appender_seq() => term()},
+ soft_limit :: non_neg_integer(),
+ slow = false :: boolean(),
+ readers = #{} :: #{term() => #stream{}}
+ }).
+
+-import(rabbit_queue_type_util, [args_policy_lookup/3]).
+
+-type client() :: #stream_client{}.
+
+-spec is_enabled() -> boolean().
+is_enabled() ->
+ rabbit_feature_flags:is_enabled(stream_queue).
+
+-spec declare(amqqueue:amqqueue(), node()) ->
+ {'new' | 'existing', amqqueue:amqqueue()} |
+ {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
+declare(Q0, Node) when ?amqqueue_is_stream(Q0) ->
+ case rabbit_queue_type_util:run_checks(
+ [fun rabbit_queue_type_util:check_auto_delete/1,
+ fun rabbit_queue_type_util:check_exclusive/1,
+ fun rabbit_queue_type_util:check_non_durable/1],
+ Q0) of
+ ok ->
+ start_cluster(Q0, Node);
+ Err ->
+ Err
+ end.
+
+start_cluster(Q0, Node) ->
+ Arguments = amqqueue:get_arguments(Q0),
+ QName = amqqueue:get_name(Q0),
+ Opts = amqqueue:get_options(Q0),
+ ActingUser = maps:get(user, Opts, ?UNKNOWN_USER),
+ Conf0 = make_stream_conf(Node, Q0),
+ case rabbit_stream_coordinator:start_cluster(
+ amqqueue:set_type_state(Q0, Conf0)) of
+ {ok, {error, already_started}, _} ->
+ {protocol_error, precondition_failed, "safe queue name already in use '~s'",
+ [Node]};
+ {ok, {created, Q}, _} ->
+ rabbit_event:notify(queue_created,
+ [{name, QName},
+ {durable, true},
+ {auto_delete, false},
+ {arguments, Arguments},
+ {user_who_performed_action,
+ ActingUser}]),
+ {new, Q};
+ {ok, {error, Error}, _} ->
+ _ = rabbit_amqqueue:internal_delete(QName, ActingUser),
+ {protocol_error, internal_error, "Cannot declare a queue '~s' on node '~s': ~255p",
+ [rabbit_misc:rs(QName), node(), Error]};
+ {ok, {existing, Q}, _} ->
+ {existing, Q};
+ {error, coordinator_unavailable} ->
+ _ = rabbit_amqqueue:internal_delete(QName, ActingUser),
+ {protocol_error, internal_error,
+ "Cannot declare a queue '~s' on node '~s': coordinator unavailable",
+ [rabbit_misc:rs(QName), node()]}
+ end.
+
+-spec delete(amqqueue:amqqueue(), boolean(),
+ boolean(), rabbit_types:username()) ->
+ rabbit_types:ok(non_neg_integer()) |
+ rabbit_types:error(in_use | not_empty).
+delete(Q, _IfUnused, _IfEmpty, ActingUser) ->
+ Name = maps:get(name, amqqueue:get_type_state(Q)),
+ {ok, Reply, _} = rabbit_stream_coordinator:delete_cluster(Name, ActingUser),
+ Reply.
+
+-spec purge(amqqueue:amqqueue()) ->
+ {ok, non_neg_integer()} | {error, term()}.
+purge(_) ->
+ {error, not_supported}.
+
+-spec policy_changed(amqqueue:amqqueue()) -> 'ok'.
+policy_changed(Q) ->
+ Name = maps:get(name, amqqueue:get_type_state(Q)),
+ _ = rabbit_stream_coordinator:policy_changed(Name),
+ ok.
+
+stat(_) ->
+ {ok, 0, 0}.
+
+consume(Q, #{prefetch_count := 0}, _)
+ when ?amqqueue_is_stream(Q) ->
+ {protocol_error, precondition_failed, "consumer prefetch count is not set for '~s'",
+ [rabbit_misc:rs(amqqueue:get_name(Q))]};
+consume(Q, #{no_ack := true}, _)
+ when ?amqqueue_is_stream(Q) ->
+ {protocol_error, not_implemented,
+ "automatic acknowledgement not supported by stream queues ~s",
+ [rabbit_misc:rs(amqqueue:get_name(Q))]};
+consume(Q, #{limiter_active := true}, _State)
+ when ?amqqueue_is_stream(Q) ->
+ {error, global_qos_not_supported_for_queue_type};
+consume(Q, Spec, QState0) when ?amqqueue_is_stream(Q) ->
+ %% Messages should include the offset as a custom header.
+ case check_queue_exists_in_local_node(Q) of
+ ok ->
+ #{no_ack := NoAck,
+ channel_pid := ChPid,
+ prefetch_count := ConsumerPrefetchCount,
+ consumer_tag := ConsumerTag,
+ exclusive_consume := ExclusiveConsume,
+ args := Args,
+ ok_msg := OkMsg} = Spec,
+ QName = amqqueue:get_name(Q),
+ Offset = case rabbit_misc:table_lookup(Args, <<"x-stream-offset">>) of
+ undefined ->
+ next;
+ {_, <<"first">>} ->
+ first;
+ {_, <<"last">>} ->
+ last;
+ {_, <<"next">>} ->
+ next;
+ {timestamp, V} ->
+ {timestamp, V};
+ {_, V} ->
+ V
+ end,
+ rabbit_core_metrics:consumer_created(ChPid, ConsumerTag, ExclusiveConsume,
+ not NoAck, QName,
+ ConsumerPrefetchCount, false,
+ up, Args),
+ %% FIXME: reply needs to be sent before the stream begins sending
+ %% really it should be sent by the stream queue process like classic queues
+ %% do
+ maybe_send_reply(ChPid, OkMsg),
+ QState = begin_stream(QState0, Q, ConsumerTag, Offset,
+ ConsumerPrefetchCount),
+ {ok, QState, []};
+ Err ->
+ Err
+ end.
+
+get_local_pid(#{leader_pid := Pid}) when node(Pid) == node() ->
+ Pid;
+get_local_pid(#{replica_pids := ReplicaPids}) ->
+ [Local | _] = lists:filter(fun(Pid) ->
+ node(Pid) == node()
+ end, ReplicaPids),
+ Local.
+
+begin_stream(#stream_client{readers = Readers0} = State,
+ Q, Tag, Offset, Max) ->
+ LocalPid = get_local_pid(amqqueue:get_type_state(Q)),
+ {ok, Seg0} = osiris:init_reader(LocalPid, Offset),
+ NextOffset = osiris_log:next_offset(Seg0) - 1,
+ osiris:register_offset_listener(LocalPid, NextOffset),
+ %% TODO: avoid double calls to the same process
+ StartOffset = case Offset of
+ first -> NextOffset;
+ last -> NextOffset;
+ next -> NextOffset;
+ {timestamp, _} -> NextOffset;
+ _ -> Offset
+ end,
+ Str0 = #stream{name = amqqueue:get_name(Q),
+ credit = Max,
+ start_offset = StartOffset,
+ listening_offset = NextOffset,
+ log = Seg0,
+ max = Max},
+ State#stream_client{readers = Readers0#{Tag => Str0}}.
+
+cancel(_Q, ConsumerTag, OkMsg, ActingUser, #stream_client{readers = Readers0,
+ name = QName} = State) ->
+ Readers = maps:remove(ConsumerTag, Readers0),
+ rabbit_core_metrics:consumer_deleted(self(), ConsumerTag, QName),
+ rabbit_event:notify(consumer_deleted, [{consumer_tag, ConsumerTag},
+ {channel, self()},
+ {queue, QName},
+ {user_who_performed_action, ActingUser}]),
+ maybe_send_reply(self(), OkMsg),
+ {ok, State#stream_client{readers = Readers}}.
+
+credit(CTag, Credit, Drain, #stream_client{readers = Readers0,
+ name = Name,
+ leader = Leader} = State) ->
+ {Readers1, Msgs} = case Readers0 of
+ #{CTag := #stream{credit = Credit0} = Str0} ->
+ Str1 = Str0#stream{credit = Credit0 + Credit},
+ {Str, Msgs0} = stream_entries(Name, Leader, Str1),
+ {Readers0#{CTag => Str}, Msgs0};
+ _ ->
+ {Readers0, []}
+ end,
+ {Readers, Actions} =
+ case Drain of
+ true ->
+ case Readers1 of
+ #{CTag := #stream{credit = Credit1} = Str2} ->
+ {Readers0#{CTag => Str2#stream{credit = 0}}, [{send_drained, {CTag, Credit1}}]};
+ _ ->
+ {Readers1, []}
+ end;
+ false ->
+ {Readers1, []}
+ end,
+ {State#stream_client{readers = Readers}, [{send_credit_reply, length(Msgs)},
+ {deliver, CTag, true, Msgs}] ++ Actions}.
+
+deliver(QSs, #delivery{confirm = Confirm} = Delivery) ->
+ lists:foldl(
+ fun({_Q, stateless}, {Qs, Actions}) ->
+ %% TODO what do we do with stateless?
+ %% QRef = amqqueue:get_pid(Q),
+ %% ok = rabbit_fifo_client:untracked_enqueue(
+ %% [QRef], Delivery#delivery.message),
+ {Qs, Actions};
+ ({Q, S0}, {Qs, Actions}) ->
+ S = deliver(Confirm, Delivery, S0),
+ {[{Q, S} | Qs], Actions}
+ end, {[], []}, QSs).
+
+deliver(_Confirm, #delivery{message = Msg, msg_seq_no = MsgId},
+ #stream_client{name = Name,
+ leader = LeaderPid,
+ next_seq = Seq,
+ correlation = Correlation0,
+ soft_limit = SftLmt,
+ slow = Slow0} = State) ->
+ ok = osiris:write(LeaderPid, Seq, msg_to_iodata(Msg)),
+ Correlation = case MsgId of
+ undefined ->
+ Correlation0;
+ _ when is_number(MsgId) ->
+ Correlation0#{Seq => MsgId}
+ end,
+ Slow = case maps:size(Correlation) >= SftLmt of
+ true when not Slow0 ->
+ credit_flow:block(Name),
+ true;
+ Bool ->
+ Bool
+ end,
+ State#stream_client{next_seq = Seq + 1,
+ correlation = Correlation,
+ slow = Slow}.
+-spec dequeue(_, _, _, client()) -> no_return().
+dequeue(_, _, _, #stream_client{name = Name}) ->
+ {protocol_error, not_implemented, "basic.get not supported by stream queues ~s",
+ [rabbit_misc:rs(Name)]}.
+
+handle_event({osiris_written, From, Corrs}, State = #stream_client{correlation = Correlation0,
+ soft_limit = SftLmt,
+ slow = Slow0,
+ name = Name}) ->
+ MsgIds = maps:values(maps:with(Corrs, Correlation0)),
+ Correlation = maps:without(Corrs, Correlation0),
+ Slow = case maps:size(Correlation) < SftLmt of
+ true when Slow0 ->
+ credit_flow:unblock(Name),
+ false;
+ _ ->
+ Slow0
+ end,
+ {ok, State#stream_client{correlation = Correlation,
+ slow = Slow}, [{settled, From, MsgIds}]};
+handle_event({osiris_offset, _From, _Offs}, State = #stream_client{leader = Leader,
+ readers = Readers0,
+ name = Name}) ->
+ %% offset isn't actually needed as we use the atomic to read the
+ %% current committed
+ {Readers, TagMsgs} = maps:fold(
+ fun (Tag, Str0, {Acc, TM}) ->
+ {Str, Msgs} = stream_entries(Name, Leader, Str0),
+ %% HACK for now, better to just return but
+ %% tricky with acks credits
+ %% that also evaluate the stream
+ % gen_server:cast(self(), {stream_delivery, Tag, Msgs}),
+ {Acc#{Tag => Str}, [{Tag, Leader, Msgs} | TM]}
+ end, {#{}, []}, Readers0),
+ Ack = true,
+ Deliveries = [{deliver, Tag, Ack, OffsetMsg}
+ || {Tag, _LeaderPid, OffsetMsg} <- TagMsgs],
+ {ok, State#stream_client{readers = Readers}, Deliveries}.
+
+is_recoverable(Q) ->
+ Node = node(),
+ #{replica_nodes := Nodes,
+ leader_node := Leader} = amqqueue:get_type_state(Q),
+ lists:member(Node, Nodes ++ [Leader]).
+
+recover(_VHost, Queues) ->
+ lists:foldl(
+ fun (Q0, {R0, F0}) ->
+ {ok, Q} = recover(Q0),
+ {[Q | R0], F0}
+ end, {[], []}, Queues).
+
+settle(complete, CTag, MsgIds, #stream_client{readers = Readers0,
+ name = Name,
+ leader = Leader} = State) ->
+ Credit = length(MsgIds),
+ {Readers, Msgs} = case Readers0 of
+ #{CTag := #stream{credit = Credit0} = Str0} ->
+ Str1 = Str0#stream{credit = Credit0 + Credit},
+ {Str, Msgs0} = stream_entries(Name, Leader, Str1),
+ {Readers0#{CTag => Str}, Msgs0};
+ _ ->
+ {Readers0, []}
+ end,
+ {State#stream_client{readers = Readers}, [{deliver, CTag, true, Msgs}]};
+settle(_, _, _, #stream_client{name = Name}) ->
+ {protocol_error, not_implemented,
+ "basic.nack and basic.reject not supported by stream queues ~s",
+ [rabbit_misc:rs(Name)]}.
+
+info(Q, all_items) ->
+ info(Q, ?INFO_KEYS);
+info(Q, Items) ->
+ lists:foldr(fun(Item, Acc) ->
+ [{Item, i(Item, Q)} | Acc]
+ end, [], Items).
+
+i(name, Q) when ?is_amqqueue(Q) -> amqqueue:get_name(Q);
+i(durable, Q) when ?is_amqqueue(Q) -> amqqueue:is_durable(Q);
+i(auto_delete, Q) when ?is_amqqueue(Q) -> amqqueue:is_auto_delete(Q);
+i(arguments, Q) when ?is_amqqueue(Q) -> amqqueue:get_arguments(Q);
+i(leader, Q) when ?is_amqqueue(Q) ->
+ #{leader_node := Leader} = amqqueue:get_type_state(Q),
+ Leader;
+i(members, Q) when ?is_amqqueue(Q) ->
+ #{replica_nodes := Nodes} = amqqueue:get_type_state(Q),
+ Nodes;
+i(online, Q) ->
+ #{replica_pids := ReplicaPids,
+ leader_pid := LeaderPid} = amqqueue:get_type_state(Q),
+ [node(P) || P <- ReplicaPids ++ [LeaderPid], rabbit_misc:is_process_alive(P)];
+i(state, Q) when ?is_amqqueue(Q) ->
+ %% TODO the coordinator should answer this, I guess??
+ running;
+i(messages, Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ case ets:lookup(queue_coarse_metrics, QName) of
+ [{_, _, _, M, _}] ->
+ M;
+ [] ->
+ 0
+ end;
+i(messages_ready, Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ case ets:lookup(queue_coarse_metrics, QName) of
+ [{_, MR, _, _, _}] ->
+ MR;
+ [] ->
+ 0
+ end;
+i(messages_unacknowledged, Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ case ets:lookup(queue_coarse_metrics, QName) of
+ [{_, _, MU, _, _}] ->
+ MU;
+ [] ->
+ 0
+ end;
+i(committed_offset, Q) ->
+ %% TODO should it be on a metrics table?
+ Data = osiris_counters:overview(),
+ maps:get(committed_offset,
+ maps:get({osiris_writer, amqqueue:get_name(Q)}, Data));
+i(policy, Q) ->
+ case rabbit_policy:name(Q) of
+ none -> '';
+ Policy -> Policy
+ end;
+i(operator_policy, Q) ->
+ case rabbit_policy:name_op(Q) of
+ none -> '';
+ Policy -> Policy
+ end;
+i(effective_policy_definition, Q) ->
+ case rabbit_policy:effective_definition(Q) of
+ undefined -> [];
+ Def -> Def
+ end;
+i(type, _) ->
+ stream;
+i(_, _) ->
+ ''.
+
+init(Q) when ?is_amqqueue(Q) ->
+ Leader = amqqueue:get_pid(Q),
+ {ok, SoftLimit} = application:get_env(rabbit, stream_messages_soft_limit),
+ #stream_client{name = amqqueue:get_name(Q),
+ leader = Leader,
+ soft_limit = SoftLimit}.
+
+close(#stream_client{readers = Readers}) ->
+ _ = maps:map(fun (_, #stream{log = Log}) ->
+ osiris_log:close(Log)
+ end, Readers),
+ ok.
+
+update(_, State) ->
+ State.
+
+state_info(_) ->
+ #{}.
+
+set_retention_policy(Name, VHost, Policy) ->
+ case rabbit_amqqueue:check_max_age(Policy) of
+ {error, _} = E ->
+ E;
+ MaxAge ->
+ QName = rabbit_misc:r(VHost, queue, Name),
+ Fun = fun(Q) ->
+ Conf = amqqueue:get_type_state(Q),
+ amqqueue:set_type_state(Q, Conf#{max_age => MaxAge})
+ end,
+ case rabbit_misc:execute_mnesia_transaction(
+ fun() -> rabbit_amqqueue:update(QName, Fun) end) of
+ not_found ->
+ {error, not_found};
+ _ ->
+ ok
+ end
+ end.
+
+add_replica(VHost, Name, Node) ->
+ QName = rabbit_misc:r(VHost, queue, Name),
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} when ?amqqueue_is_classic(Q) ->
+ {error, classic_queue_not_supported};
+ {ok, Q} when ?amqqueue_is_quorum(Q) ->
+ {error, quorum_queue_not_supported};
+ {ok, Q} when ?amqqueue_is_stream(Q) ->
+ case lists:member(Node, rabbit_mnesia:cluster_nodes(running)) of
+ false ->
+ {error, node_not_running};
+ true ->
+ #{name := StreamId} = amqqueue:get_type_state(Q),
+ {ok, Reply, _} = rabbit_stream_coordinator:add_replica(StreamId, Node),
+ Reply
+ end;
+ E ->
+ E
+ end.
+
+delete_replica(VHost, Name, Node) ->
+ QName = rabbit_misc:r(VHost, queue, Name),
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} when ?amqqueue_is_classic(Q) ->
+ {error, classic_queue_not_supported};
+ {ok, Q} when ?amqqueue_is_quorum(Q) ->
+ {error, quorum_queue_not_supported};
+ {ok, Q} when ?amqqueue_is_stream(Q) ->
+ case lists:member(Node, rabbit_mnesia:cluster_nodes(running)) of
+ false ->
+ {error, node_not_running};
+ true ->
+ #{name := StreamId} = amqqueue:get_type_state(Q),
+ {ok, Reply, _} = rabbit_stream_coordinator:delete_replica(StreamId, Node),
+ Reply
+ end;
+ E ->
+ E
+ end.
+
+make_stream_conf(Node, Q) ->
+ QName = amqqueue:get_name(Q),
+ Name = queue_name(QName),
+ %% MaxLength = args_policy_lookup(<<"max-length">>, fun min/2, Q),
+ MaxBytes = args_policy_lookup(<<"max-length-bytes">>, fun min/2, Q),
+ MaxAge = max_age(args_policy_lookup(<<"max-age">>, fun max_age/2, Q)),
+ MaxSegmentSize = args_policy_lookup(<<"max-segment-size">>, fun min/2, Q),
+ LeaderLocator = queue_leader_locator(args_policy_lookup(<<"queue-leader-locator">>,
+ fun res_arg/2, Q)),
+ InitialClusterSize = initial_cluster_size(args_policy_lookup(<<"initial-cluster-size">>,
+ fun res_arg/2, Q)),
+ Replicas0 = rabbit_mnesia:cluster_nodes(all) -- [Node],
+ Replicas = select_stream_nodes(InitialClusterSize - 1, Replicas0),
+ Formatter = {?MODULE, format_osiris_event, [QName]},
+ Retention = lists:filter(fun({_, R}) ->
+ R =/= undefined
+ end, [{max_bytes, MaxBytes},
+ {max_age, MaxAge}]),
+ add_if_defined(max_segment_size, MaxSegmentSize, #{reference => QName,
+ name => Name,
+ retention => Retention,
+ leader_locator_strategy => LeaderLocator,
+ leader_node => Node,
+ replica_nodes => Replicas,
+ event_formatter => Formatter,
+ epoch => 1}).
+
+select_stream_nodes(Size, All) when length(All) =< Size ->
+ All;
+select_stream_nodes(Size, All) ->
+ Node = node(),
+ case lists:member(Node, All) of
+ true ->
+ select_stream_nodes(Size - 1, lists:delete(Node, All), [Node]);
+ false ->
+ select_stream_nodes(Size, All, [])
+ end.
+
+select_stream_nodes(0, _, Selected) ->
+ Selected;
+select_stream_nodes(Size, Rest, Selected) ->
+ S = lists:nth(rand:uniform(length(Rest)), Rest),
+ select_stream_nodes(Size - 1, lists:delete(S, Rest), [S | Selected]).
+
+update_stream_conf(#{reference := QName} = Conf) ->
+ case rabbit_amqqueue:lookup(QName) of
+ {ok, Q} ->
+ MaxBytes = args_policy_lookup(<<"max-length-bytes">>, fun min/2, Q),
+ MaxAge = max_age(args_policy_lookup(<<"max-age">>, fun max_age/2, Q)),
+ MaxSegmentSize = args_policy_lookup(<<"max-segment-size">>, fun min/2, Q),
+ Retention = lists:filter(fun({_, R}) ->
+ R =/= undefined
+ end, [{max_bytes, MaxBytes},
+ {max_age, MaxAge}]),
+ add_if_defined(max_segment_size, MaxSegmentSize, Conf#{retention => Retention});
+ _ ->
+ Conf
+ end.
+
+add_if_defined(_, undefined, Map) ->
+ Map;
+add_if_defined(Key, Value, Map) ->
+ maps:put(Key, Value, Map).
+
+format_osiris_event(Evt, QRef) ->
+ {'$gen_cast', {queue_event, QRef, Evt}}.
+
+max_age(undefined) ->
+ undefined;
+max_age(Bin) when is_binary(Bin) ->
+ rabbit_amqqueue:check_max_age(Bin);
+max_age(Age) ->
+ Age.
+
+max_age(Age1, Age2) ->
+ min(rabbit_amqqueue:check_max_age(Age1), rabbit_amqqueue:check_max_age(Age2)).
+
+queue_leader_locator(undefined) -> <<"client-local">>;
+queue_leader_locator(Val) -> Val.
+
+initial_cluster_size(undefined) ->
+ length(rabbit_mnesia:cluster_nodes(running));
+initial_cluster_size(Val) ->
+ Val.
+
+res_arg(PolVal, undefined) -> PolVal;
+res_arg(_, ArgVal) -> ArgVal.
+
+queue_name(#resource{virtual_host = VHost, name = Name}) ->
+ Timestamp = erlang:integer_to_binary(erlang:system_time()),
+ osiris_util:to_base64uri(erlang:binary_to_list(<<VHost/binary, "_", Name/binary, "_",
+ Timestamp/binary>>)).
+
+recover(Q) ->
+ rabbit_stream_coordinator:recover(),
+ {ok, Q}.
+
+check_queue_exists_in_local_node(Q) ->
+ Conf = amqqueue:get_type_state(Q),
+ AllNodes = [maps:get(leader_node, Conf) | maps:get(replica_nodes, Conf)],
+ case lists:member(node(), AllNodes) of
+ true ->
+ ok;
+ false ->
+ {protocol_error, precondition_failed,
+ "queue '~s' does not a have a replica on the local node",
+ [rabbit_misc:rs(amqqueue:get_name(Q))]}
+ end.
+
+maybe_send_reply(_ChPid, undefined) -> ok;
+maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
+
+stream_entries(Name, Id, Str) ->
+ stream_entries(Name, Id, Str, []).
+
+stream_entries(Name, LeaderPid,
+ #stream{name = QName,
+ credit = Credit,
+ start_offset = StartOffs,
+ listening_offset = LOffs,
+ log = Seg0} = Str0, MsgIn)
+ when Credit > 0 ->
+ case osiris_log:read_chunk_parsed(Seg0) of
+ {end_of_stream, Seg} ->
+ NextOffset = osiris_log:next_offset(Seg),
+ case NextOffset > LOffs of
+ true ->
+ osiris:register_offset_listener(LeaderPid, NextOffset),
+ {Str0#stream{log = Seg,
+ listening_offset = NextOffset}, MsgIn};
+ false ->
+ {Str0#stream{log = Seg}, MsgIn}
+ end;
+ {Records, Seg} ->
+ Msgs = [begin
+ Msg0 = binary_to_msg(QName, B),
+ Msg = rabbit_basic:add_header(<<"x-stream-offset">>,
+ long, O, Msg0),
+ {Name, LeaderPid, O, false, Msg}
+ end || {O, B} <- Records,
+ O >= StartOffs],
+
+ NumMsgs = length(Msgs),
+
+ Str = Str0#stream{credit = Credit - NumMsgs,
+ log = Seg},
+ case Str#stream.credit < 1 of
+ true ->
+ %% we are done here
+ {Str, MsgIn ++ Msgs};
+ false ->
+ %% if there are fewer Msgs than Entries0 it means there were non-events
+ %% in the log and we should recurse and try again
+ stream_entries(Name, LeaderPid, Str, MsgIn ++ Msgs)
+ end
+ end;
+stream_entries(_Name, _Id, Str, Msgs) ->
+ {Str, Msgs}.
+
+binary_to_msg(#resource{virtual_host = VHost,
+ kind = queue,
+ name = QName}, Data) ->
+ R0 = rabbit_msg_record:init(Data),
+ %% if the message annotation isn't present the data most likely came from
+ %% the rabbitmq-stream plugin so we'll choose defaults that simulate use
+ %% of the direct exchange
+ {utf8, Exchange} = rabbit_msg_record:message_annotation(<<"x-exchange">>,
+ R0, {utf8, <<>>}),
+ {utf8, RoutingKey} = rabbit_msg_record:message_annotation(<<"x-routing-key">>,
+ R0, {utf8, QName}),
+ {Props, Payload} = rabbit_msg_record:to_amqp091(R0),
+ XName = #resource{kind = exchange,
+ virtual_host = VHost,
+ name = Exchange},
+ Content = #content{class_id = 60,
+ properties = Props,
+ properties_bin = none,
+ payload_fragments_rev = [Payload]},
+ {ok, Msg} = rabbit_basic:message(XName, RoutingKey, Content),
+ Msg.
+
+
+msg_to_iodata(#basic_message{exchange_name = #resource{name = Exchange},
+ routing_keys = [RKey | _],
+ content = Content}) ->
+ #content{properties = Props,
+ payload_fragments_rev = Payload} =
+ rabbit_binary_parser:ensure_content_decoded(Content),
+ R0 = rabbit_msg_record:from_amqp091(Props, lists:reverse(Payload)),
+ %% TODO durable?
+ R = rabbit_msg_record:add_message_annotations(
+ #{<<"x-exchange">> => {utf8, Exchange},
+ <<"x-routing-key">> => {utf8, RKey}}, R0),
+ rabbit_msg_record:to_iodata(R).
+
+capabilities() ->
+ #{policies => [<<"max-length-bytes">>, <<"max-age">>, <<"max-segment-size">>,
+ <<"queue-leader-locator">>, <<"initial-cluster-size">>],
+ queue_arguments => [<<"x-dead-letter-exchange">>, <<"x-dead-letter-routing-key">>,
+ <<"x-max-length">>, <<"x-max-length-bytes">>,
+ <<"x-single-active-consumer">>, <<"x-queue-type">>,
+ <<"x-max-age">>, <<"x-max-segment-size">>,
+ <<"x-initial-cluster-size">>, <<"x-queue-leader-locator">>],
+ consumer_arguments => [<<"x-stream-offset">>],
+ server_named => false}.
diff --git a/deps/rabbit/src/rabbit_sup.erl b/deps/rabbit/src/rabbit_sup.erl
new file mode 100644
index 0000000000..06643b155d
--- /dev/null
+++ b/deps/rabbit/src/rabbit_sup.erl
@@ -0,0 +1,109 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_sup).
+
+-behaviour(supervisor).
+
+-export([start_link/0, start_child/1, start_child/2, start_child/3, start_child/4,
+ start_supervisor_child/1, start_supervisor_child/2,
+ start_supervisor_child/3,
+ start_restartable_child/1, start_restartable_child/2,
+ start_delayed_restartable_child/1, start_delayed_restartable_child/2,
+ stop_child/1]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+-define(SERVER, ?MODULE).
+
+%%----------------------------------------------------------------------------
+
+-spec start_link() -> rabbit_types:ok_pid_or_error().
+
+start_link() -> supervisor:start_link({local, ?SERVER}, ?MODULE, []).
+
+-spec start_child(atom()) -> 'ok'.
+
+start_child(Mod) -> start_child(Mod, []).
+
+-spec start_child(atom(), [any()]) -> 'ok'.
+
+start_child(Mod, Args) -> start_child(Mod, Mod, Args).
+
+-spec start_child(atom(), atom(), [any()]) -> 'ok'.
+
+start_child(ChildId, Mod, Args) ->
+ child_reply(supervisor:start_child(
+ ?SERVER,
+ {ChildId, {Mod, start_link, Args},
+ transient, ?WORKER_WAIT, worker, [Mod]})).
+
+-spec start_child(atom(), atom(), atom(), [any()]) -> 'ok'.
+
+start_child(ChildId, Mod, Fun, Args) ->
+ child_reply(supervisor:start_child(
+ ?SERVER,
+ {ChildId, {Mod, Fun, Args},
+ transient, ?WORKER_WAIT, worker, [Mod]})).
+
+-spec start_supervisor_child(atom()) -> 'ok'.
+
+start_supervisor_child(Mod) -> start_supervisor_child(Mod, []).
+
+-spec start_supervisor_child(atom(), [any()]) -> 'ok'.
+
+start_supervisor_child(Mod, Args) -> start_supervisor_child(Mod, Mod, Args).
+
+-spec start_supervisor_child(atom(), atom(), [any()]) -> 'ok'.
+
+start_supervisor_child(ChildId, Mod, Args) ->
+ child_reply(supervisor:start_child(
+ ?SERVER,
+ {ChildId, {Mod, start_link, Args},
+ transient, infinity, supervisor, [Mod]})).
+
+-spec start_restartable_child(atom()) -> 'ok'.
+
+start_restartable_child(M) -> start_restartable_child(M, [], false).
+
+-spec start_restartable_child(atom(), [any()]) -> 'ok'.
+
+start_restartable_child(M, A) -> start_restartable_child(M, A, false).
+
+-spec start_delayed_restartable_child(atom()) -> 'ok'.
+
+start_delayed_restartable_child(M) -> start_restartable_child(M, [], true).
+
+-spec start_delayed_restartable_child(atom(), [any()]) -> 'ok'.
+
+start_delayed_restartable_child(M, A) -> start_restartable_child(M, A, true).
+
+start_restartable_child(Mod, Args, Delay) ->
+ Name = list_to_atom(atom_to_list(Mod) ++ "_sup"),
+ child_reply(supervisor:start_child(
+ ?SERVER,
+ {Name, {rabbit_restartable_sup, start_link,
+ [Name, {Mod, start_link, Args}, Delay]},
+ transient, infinity, supervisor, [rabbit_restartable_sup]})).
+
+-spec stop_child(atom()) -> rabbit_types:ok_or_error(any()).
+
+stop_child(ChildId) ->
+ case supervisor:terminate_child(?SERVER, ChildId) of
+ ok -> supervisor:delete_child(?SERVER, ChildId);
+ E -> E
+ end.
+
+init([]) -> {ok, {{one_for_all, 0, 1}, []}}.
+
+
+%%----------------------------------------------------------------------------
+
+child_reply({ok, _}) -> ok;
+child_reply(X) -> X.
diff --git a/deps/rabbit/src/rabbit_sysmon_handler.erl b/deps/rabbit/src/rabbit_sysmon_handler.erl
new file mode 100644
index 0000000000..8f7298ed6e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_sysmon_handler.erl
@@ -0,0 +1,235 @@
+%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved.
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% https://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+
+%% @doc A custom event handler to the `sysmon_handler' application's
+%% `system_monitor' event manager.
+%%
+%% This module attempts to discover more information about a process
+%% that generates a system_monitor event.
+
+-module(rabbit_sysmon_handler).
+
+-behaviour(gen_event).
+
+%% API
+-export([add_handler/0]).
+
+%% gen_event callbacks
+-export([init/1, handle_event/2, handle_call/2,
+ handle_info/2, terminate/2, code_change/3]).
+
+-record(state, {timer_ref :: reference() | undefined}).
+
+-define(INACTIVITY_TIMEOUT, 5000).
+
+%%%===================================================================
+%%% gen_event callbacks
+%%%===================================================================
+
+add_handler() ->
+ %% Vulnerable to race conditions (installing handler multiple
+ %% times), but risk is zero in the common OTP app startup case.
+ case lists:member(?MODULE, gen_event:which_handlers(sysmon_handler)) of
+ true ->
+ ok;
+ false ->
+ sysmon_handler_filter:add_custom_handler(?MODULE, [])
+ end.
+
+%%%===================================================================
+%%% gen_event callbacks
+%%%===================================================================
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Whenever a new event handler is added to an event manager,
+%% this function is called to initialize the event handler.
+%%
+%% @spec init(Args) -> {ok, State}
+%% @end
+%%--------------------------------------------------------------------
+init([]) ->
+ {ok, #state{}, hibernate}.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Whenever an event manager receives an event sent using
+%% gen_event:notify/2 or gen_event:sync_notify/2, this function is
+%% called for each installed event handler to handle the event.
+%%
+%% @spec handle_event(Event, State) ->
+%% {ok, State} |
+%% {swap_handler, Args1, State1, Mod2, Args2} |
+%% remove_handler
+%% @end
+%%--------------------------------------------------------------------
+handle_event({monitor, Pid, Type, _Info},
+ State=#state{timer_ref=TimerRef}) when Pid == self() ->
+ %% Reset the inactivity timeout
+ NewTimerRef = reset_timer(TimerRef),
+ maybe_collect_garbage(Type),
+ {ok, State#state{timer_ref=NewTimerRef}};
+handle_event({monitor, PidOrPort, Type, Info}, State=#state{timer_ref=TimerRef}) ->
+ %% Reset the inactivity timeout
+ NewTimerRef = reset_timer(TimerRef),
+ {Fmt, Args} = format_pretty_proc_or_port_info(PidOrPort),
+ rabbit_log:warning("~p ~w ~w " ++ Fmt ++ " ~w", [?MODULE, Type, PidOrPort] ++ Args ++ [Info]),
+ {ok, State#state{timer_ref=NewTimerRef}};
+handle_event({suppressed, Type, Info}, State=#state{timer_ref=TimerRef}) ->
+ %% Reset the inactivity timeout
+ NewTimerRef = reset_timer(TimerRef),
+ rabbit_log:debug("~p encountered a suppressed event of type ~w: ~w", [?MODULE, Type, Info]),
+ {ok, State#state{timer_ref=NewTimerRef}};
+handle_event(Event, State=#state{timer_ref=TimerRef}) ->
+ NewTimerRef = reset_timer(TimerRef),
+ rabbit_log:warning("~p unhandled event: ~p", [?MODULE, Event]),
+ {ok, State#state{timer_ref=NewTimerRef}}.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Whenever an event manager receives a request sent using
+%% gen_event:call/3,4, this function is called for the specified
+%% event handler to handle the request.
+%%
+%% @spec handle_call(Request, State) ->
+%% {ok, Reply, State} |
+%% {swap_handler, Reply, Args1, State1, Mod2, Args2} |
+%% {remove_handler, Reply}
+%% @end
+%%--------------------------------------------------------------------
+handle_call(_Call, State) ->
+ Reply = not_supported,
+ {ok, Reply, State}.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% This function is called for each installed event handler when
+%% an event manager receives any other message than an event or a
+%% synchronous request (or a system message).
+%%
+%% @spec handle_info(Info, State) ->
+%% {ok, State} |
+%% {swap_handler, Args1, State1, Mod2, Args2} |
+%% remove_handler
+%% @end
+%%--------------------------------------------------------------------
+handle_info(inactivity_timeout, State) ->
+ %% No events have arrived for the timeout period
+ %% so hibernate to free up resources.
+ {ok, State, hibernate};
+handle_info(Info, State) ->
+ rabbit_log:info("handle_info got ~p", [Info]),
+ {ok, State}.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Whenever an event handler is deleted from an event manager, this
+%% function is called. It should be the opposite of Module:init/1 and
+%% do any necessary cleaning up.
+%%
+%% @spec terminate(Reason, State) -> void()
+%% @end
+%%--------------------------------------------------------------------
+terminate(_Reason, _State) ->
+ ok.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Convert process state when code is changed
+%%
+%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState}
+%% @end
+%%--------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%%===================================================================
+%%% Internal functions
+%%%===================================================================
+
+format_pretty_proc_or_port_info(PidOrPort) ->
+ try
+ case get_pretty_proc_or_port_info(PidOrPort) of
+ undefined ->
+ {"", []};
+ Res ->
+ Res
+ end
+ catch C:E:S ->
+ {"Pid ~w, ~W ~W at ~w\n",
+ [PidOrPort, C, 20, E, 20, S]}
+ end.
+
+get_pretty_proc_or_port_info(Pid) when is_pid(Pid) ->
+ Infos = [registered_name, initial_call, current_function, message_queue_len],
+ case process_info(Pid, Infos) of
+ undefined ->
+ undefined;
+ [] ->
+ undefined;
+ [{registered_name, RN0}, ICT1, {_, CF}, {_, MQL}] ->
+ ICT = case proc_lib:translate_initial_call(Pid) of
+ {proc_lib, init_p, 5} -> % not by proc_lib, see docs
+ ICT1;
+ ICT2 ->
+ {initial_call, ICT2}
+ end,
+ RNL = if RN0 == [] -> [];
+ true -> [{name, RN0}]
+ end,
+ {"~w", [RNL ++ [ICT, CF, {message_queue_len, MQL}]]}
+ end;
+get_pretty_proc_or_port_info(Port) when is_port(Port) ->
+ PortInfo = erlang:port_info(Port),
+ {value, {name, Name}, PortInfo2} = lists:keytake(name, 1, PortInfo),
+ QueueSize = [erlang:port_info(Port, queue_size)],
+ Connected = case proplists:get_value(connected, PortInfo2) of
+ undefined ->
+ [];
+ ConnectedPid ->
+ case proc_lib:translate_initial_call(ConnectedPid) of
+ {proc_lib, init_p, 5} -> % not by proc_lib, see docs
+ [];
+ ICT ->
+ [{initial_call, ICT}]
+ end
+ end,
+ {"name ~s ~w", [Name, lists:append([PortInfo2, QueueSize, Connected])]}.
+
+
+%% @doc If the message type is due to a large heap warning
+%% and the source is ourself, go ahead and collect garbage
+%% to avoid the death spiral.
+-spec maybe_collect_garbage(atom()) -> ok.
+maybe_collect_garbage(large_heap) ->
+ erlang:garbage_collect(),
+ ok;
+maybe_collect_garbage(_) ->
+ ok.
+
+-spec reset_timer(undefined | reference()) -> reference().
+reset_timer(undefined) ->
+ erlang:send_after(?INACTIVITY_TIMEOUT, self(), inactivity_timeout);
+reset_timer(TimerRef) ->
+ _ = erlang:cancel_timer(TimerRef),
+ reset_timer(undefined).
diff --git a/deps/rabbit/src/rabbit_sysmon_minder.erl b/deps/rabbit/src/rabbit_sysmon_minder.erl
new file mode 100644
index 0000000000..a0402e5ebe
--- /dev/null
+++ b/deps/rabbit/src/rabbit_sysmon_minder.erl
@@ -0,0 +1,156 @@
+%% -------------------------------------------------------------------
+%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved.
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% https://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(rabbit_sysmon_minder).
+
+-behaviour(gen_server).
+
+%% API
+-export([start_link/0]).
+
+%% gen_server callbacks
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-record(state, {}).
+
+%%%===================================================================
+%%% API
+%%%===================================================================
+
+%%--------------------------------------------------------------------
+%% @doc
+%% Starts the server
+%%
+%% @spec start_link() -> {ok, Pid} | ignore | {error, Error}
+%% @end
+%%--------------------------------------------------------------------
+start_link() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+%%%===================================================================
+%%% gen_server callbacks
+%%%===================================================================
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Initializes the server
+%%
+%% @spec init(Args) -> {ok, State} |
+%% {ok, State, Timeout} |
+%% ignore |
+%% {stop, Reason}
+%% @end
+%%--------------------------------------------------------------------
+init([]) ->
+ %% Add our system_monitor event handler. We do that here because
+ %% we have a process at our disposal (i.e. ourself) to receive the
+ %% notification in the very unlikely event that the
+ %% sysmon_handler has crashed and been removed from the
+ %% sysmon_handler gen_event server. (If we had a supervisor
+ %% or app-starting process add the handler, then if the handler
+ %% crashes, nobody will act on the crash notification.)
+ rabbit_sysmon_handler:add_handler(),
+ {ok, #state{}}.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Handling call messages
+%%
+%% @spec handle_call(Request, From, State) ->
+%% {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} |
+%% {stop, Reason, State}
+%% @end
+%%--------------------------------------------------------------------
+handle_call(_Request, _From, State) ->
+ Reply = ok,
+ {reply, Reply, State}.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Handling cast messages
+%%
+%% @spec handle_cast(Msg, State) -> {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State}
+%% @end
+%%--------------------------------------------------------------------
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Handling all non call/cast messages
+%%
+%% @spec handle_info(Info, State) -> {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State}
+%% @end
+%%--------------------------------------------------------------------
+handle_info({gen_event_EXIT, rabbit_sysmon_handler, _}, State) ->
+ %% SASL will create an error message, no need for us to duplicate it.
+ %%
+ %% Our handler should never crash, but it did indeed crash. If
+ %% there's a pathological condition somewhere that's generating
+ %% lots of unforseen things that crash core's custom handler, we
+ %% could make things worse by jumping back into the exploding
+ %% volcano. Wait a little bit before jumping back. Besides, the
+ %% system_monitor data is nice but is not critical: there is no
+ %% need to make things worse if things are indeed bad, and if we
+ %% miss a few seconds of system_monitor events, the world will not
+ %% end.
+ timer:sleep(2*1000),
+ rabbit_sysmon_handler:add_handler(),
+ {noreply, State};
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% This function is called by a gen_server when it is about to
+%% terminate. It should be the opposite of Module:init/1 and do any
+%% necessary cleaning up. When it returns, the gen_server terminates
+%% with Reason. The return value is ignored.
+%%
+%% @spec terminate(Reason, State) -> void()
+%% @end
+%%--------------------------------------------------------------------
+terminate(_Reason, _State) ->
+ ok.
+
+%%--------------------------------------------------------------------
+%% @private
+%% @doc
+%% Convert process state when code is changed
+%%
+%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState}
+%% @end
+%%--------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
diff --git a/deps/rabbit/src/rabbit_table.erl b/deps/rabbit/src/rabbit_table.erl
new file mode 100644
index 0000000000..77534763d0
--- /dev/null
+++ b/deps/rabbit/src/rabbit_table.erl
@@ -0,0 +1,416 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_table).
+
+-export([
+ create/0, create/2, ensure_local_copies/1, ensure_table_copy/2,
+ wait_for_replicated/1, wait/1, wait/2,
+ force_load/0, is_present/0, is_empty/0, needs_default_data/0,
+ check_schema_integrity/1, clear_ram_only_tables/0, retry_timeout/0,
+ wait_for_replicated/0, exists/1]).
+
+%% for testing purposes
+-export([definitions/0]).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-type retry() :: boolean().
+-type mnesia_table() :: atom().
+
+%%----------------------------------------------------------------------------
+%% Main interface
+%%----------------------------------------------------------------------------
+
+-spec create() -> 'ok'.
+
+create() ->
+ lists:foreach(
+ fun ({Table, Def}) -> create(Table, Def) end,
+ definitions()),
+ ensure_secondary_indexes(),
+ ok.
+
+-spec create(mnesia_table(), list()) -> rabbit_types:ok_or_error(any()).
+
+create(TableName, TableDefinition) ->
+ TableDefinition1 = proplists:delete(match, TableDefinition),
+ rabbit_log:debug("Will create a schema database table '~s'", [TableName]),
+ case mnesia:create_table(TableName, TableDefinition1) of
+ {atomic, ok} -> ok;
+ {aborted,{already_exists, TableName}} -> ok;
+ {aborted, {already_exists, TableName, _}} -> ok;
+ {aborted, Reason} ->
+ throw({error, {table_creation_failed, TableName, TableDefinition1, Reason}})
+ end.
+
+-spec exists(mnesia_table()) -> boolean().
+exists(Table) ->
+ lists:member(Table, mnesia:system_info(tables)).
+
+%% Sets up secondary indexes in a blank node database.
+ensure_secondary_indexes() ->
+ ensure_secondary_index(rabbit_queue, vhost),
+ ok.
+
+ensure_secondary_index(Table, Field) ->
+ case mnesia:add_table_index(Table, Field) of
+ {atomic, ok} -> ok;
+ {aborted, {already_exists, Table, _}} -> ok
+ end.
+
+-spec ensure_table_copy(mnesia_table(), node()) -> ok | {error, any()}.
+ensure_table_copy(TableName, Node) ->
+ rabbit_log:debug("Will add a local schema database copy for table '~s'", [TableName]),
+ case mnesia:add_table_copy(TableName, Node, disc_copies) of
+ {atomic, ok} -> ok;
+ {aborted,{already_exists, TableName}} -> ok;
+ {aborted, {already_exists, TableName, _}} -> ok;
+ {aborted, Reason} -> {error, Reason}
+ end.
+
+%% This arity only exists for backwards compatibility with certain
+%% plugins. See https://github.com/rabbitmq/rabbitmq-clusterer/issues/19.
+
+-spec wait_for_replicated() -> 'ok'.
+
+wait_for_replicated() ->
+ wait_for_replicated(false).
+
+-spec wait_for_replicated(retry()) -> 'ok'.
+
+wait_for_replicated(Retry) ->
+ wait([Tab || {Tab, TabDef} <- definitions(),
+ not lists:member({local_content, true}, TabDef)], Retry).
+
+-spec wait([atom()]) -> 'ok'.
+
+wait(TableNames) ->
+ wait(TableNames, _Retry = false).
+
+wait(TableNames, Retry) ->
+ {Timeout, Retries} = retry_timeout(Retry),
+ wait(TableNames, Timeout, Retries).
+
+wait(TableNames, Timeout, Retries) ->
+ %% We might be in ctl here for offline ops, in which case we can't
+ %% get_env() for the rabbit app.
+ rabbit_log:info("Waiting for Mnesia tables for ~p ms, ~p retries left~n",
+ [Timeout, Retries - 1]),
+ Result = case mnesia:wait_for_tables(TableNames, Timeout) of
+ ok ->
+ ok;
+ {timeout, BadTabs} ->
+ AllNodes = rabbit_mnesia:cluster_nodes(all),
+ {error, {timeout_waiting_for_tables, AllNodes, BadTabs}};
+ {error, Reason} ->
+ AllNodes = rabbit_mnesia:cluster_nodes(all),
+ {error, {failed_waiting_for_tables, AllNodes, Reason}}
+ end,
+ case {Retries, Result} of
+ {_, ok} ->
+ rabbit_log:info("Successfully synced tables from a peer"),
+ ok;
+ {1, {error, _} = Error} ->
+ throw(Error);
+ {_, {error, Error}} ->
+ rabbit_log:warning("Error while waiting for Mnesia tables: ~p~n", [Error]),
+ wait(TableNames, Timeout, Retries - 1)
+ end.
+
+retry_timeout(_Retry = false) ->
+ {retry_timeout(), 1};
+retry_timeout(_Retry = true) ->
+ Retries = case application:get_env(rabbit, mnesia_table_loading_retry_limit) of
+ {ok, T} -> T;
+ undefined -> 10
+ end,
+ {retry_timeout(), Retries}.
+
+-spec retry_timeout() -> non_neg_integer() | infinity.
+
+retry_timeout() ->
+ case application:get_env(rabbit, mnesia_table_loading_retry_timeout) of
+ {ok, T} -> T;
+ undefined -> 30000
+ end.
+
+-spec force_load() -> 'ok'.
+
+force_load() -> [mnesia:force_load_table(T) || T <- names()], ok.
+
+-spec is_present() -> boolean().
+
+is_present() -> names() -- mnesia:system_info(tables) =:= [].
+
+-spec is_empty() -> boolean().
+
+is_empty() -> is_empty(names()).
+
+-spec needs_default_data() -> boolean().
+
+needs_default_data() -> is_empty([rabbit_user, rabbit_user_permission,
+ rabbit_vhost]).
+
+is_empty(Names) ->
+ lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end,
+ Names).
+
+-spec check_schema_integrity(retry()) -> rabbit_types:ok_or_error(any()).
+
+check_schema_integrity(Retry) ->
+ Tables = mnesia:system_info(tables),
+ case check(fun (Tab, TabDef) ->
+ case lists:member(Tab, Tables) of
+ false -> {error, {table_missing, Tab}};
+ true -> check_attributes(Tab, TabDef)
+ end
+ end) of
+ ok -> wait(names(), Retry),
+ check(fun check_content/2);
+ Other -> Other
+ end.
+
+-spec clear_ram_only_tables() -> 'ok'.
+
+clear_ram_only_tables() ->
+ Node = node(),
+ lists:foreach(
+ fun (TabName) ->
+ case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of
+ true -> {atomic, ok} = mnesia:clear_table(TabName);
+ false -> ok
+ end
+ end, names()),
+ ok.
+
+%% The sequence in which we delete the schema and then the other
+%% tables is important: if we delete the schema first when moving to
+%% RAM mnesia will loudly complain since it doesn't make much sense to
+%% do that. But when moving to disc, we need to move the schema first.
+
+-spec ensure_local_copies('disc' | 'ram') -> 'ok'.
+
+ensure_local_copies(disc) ->
+ create_local_copy(schema, disc_copies),
+ create_local_copies(disc);
+ensure_local_copies(ram) ->
+ create_local_copies(ram),
+ create_local_copy(schema, ram_copies).
+
+%%--------------------------------------------------------------------
+%% Internal helpers
+%%--------------------------------------------------------------------
+
+create_local_copies(Type) ->
+ lists:foreach(
+ fun ({Tab, TabDef}) ->
+ HasDiscCopies = has_copy_type(TabDef, disc_copies),
+ HasDiscOnlyCopies = has_copy_type(TabDef, disc_only_copies),
+ LocalTab = proplists:get_bool(local_content, TabDef),
+ StorageType =
+ if
+ Type =:= disc orelse LocalTab ->
+ if
+ HasDiscCopies -> disc_copies;
+ HasDiscOnlyCopies -> disc_only_copies;
+ true -> ram_copies
+ end;
+ Type =:= ram ->
+ ram_copies
+ end,
+ ok = create_local_copy(Tab, StorageType)
+ end, definitions(Type)),
+ ok.
+
+create_local_copy(Tab, Type) ->
+ StorageType = mnesia:table_info(Tab, storage_type),
+ {atomic, ok} =
+ if
+ StorageType == unknown ->
+ mnesia:add_table_copy(Tab, node(), Type);
+ StorageType /= Type ->
+ mnesia:change_table_copy_type(Tab, node(), Type);
+ true -> {atomic, ok}
+ end,
+ ok.
+
+has_copy_type(TabDef, DiscType) ->
+ lists:member(node(), proplists:get_value(DiscType, TabDef, [])).
+
+check_attributes(Tab, TabDef) ->
+ {_, ExpAttrs} = proplists:lookup(attributes, TabDef),
+ case mnesia:table_info(Tab, attributes) of
+ ExpAttrs -> ok;
+ Attrs -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}}
+ end.
+
+check_content(Tab, TabDef) ->
+ {_, Match} = proplists:lookup(match, TabDef),
+ case mnesia:dirty_first(Tab) of
+ '$end_of_table' ->
+ ok;
+ Key ->
+ ObjList = mnesia:dirty_read(Tab, Key),
+ MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]),
+ case ets:match_spec_run(ObjList, MatchComp) of
+ ObjList -> ok;
+ _ -> {error, {table_content_invalid, Tab, Match, ObjList}}
+ end
+ end.
+
+check(Fun) ->
+ case [Error || {Tab, TabDef} <- definitions(),
+ begin
+ {Ret, Error} = case Fun(Tab, TabDef) of
+ ok -> {false, none};
+ {error, E} -> {true, E}
+ end,
+ Ret
+ end] of
+ [] -> ok;
+ Errors -> {error, Errors}
+ end.
+
+%%--------------------------------------------------------------------
+%% Table definitions
+%%--------------------------------------------------------------------
+
+names() -> [Tab || {Tab, _} <- definitions()].
+
+%% The tables aren't supposed to be on disk on a ram node
+definitions(disc) ->
+ definitions();
+definitions(ram) ->
+ [{Tab, [{disc_copies, []}, {ram_copies, [node()]} |
+ proplists:delete(
+ ram_copies, proplists:delete(disc_copies, TabDef))]} ||
+ {Tab, TabDef} <- definitions()].
+
+definitions() ->
+ [{rabbit_user,
+ [{record_name, internal_user},
+ {attributes, internal_user:fields()},
+ {disc_copies, [node()]},
+ {match, internal_user:pattern_match_all()}]},
+ {rabbit_user_permission,
+ [{record_name, user_permission},
+ {attributes, record_info(fields, user_permission)},
+ {disc_copies, [node()]},
+ {match, #user_permission{user_vhost = #user_vhost{_='_'},
+ permission = #permission{_='_'},
+ _='_'}}]},
+ {rabbit_topic_permission,
+ [{record_name, topic_permission},
+ {attributes, record_info(fields, topic_permission)},
+ {disc_copies, [node()]},
+ {match, #topic_permission{topic_permission_key = #topic_permission_key{_='_'},
+ permission = #permission{_='_'},
+ _='_'}}]},
+ {rabbit_vhost,
+ [
+ {record_name, vhost},
+ {attributes, vhost:fields()},
+ {disc_copies, [node()]},
+ {match, vhost:pattern_match_all()}]},
+ {rabbit_listener,
+ [{record_name, listener},
+ {attributes, record_info(fields, listener)},
+ {type, bag},
+ {match, #listener{_='_'}}]},
+ {rabbit_durable_route,
+ [{record_name, route},
+ {attributes, record_info(fields, route)},
+ {disc_copies, [node()]},
+ {match, #route{binding = binding_match(), _='_'}}]},
+ {rabbit_semi_durable_route,
+ [{record_name, route},
+ {attributes, record_info(fields, route)},
+ {type, ordered_set},
+ {match, #route{binding = binding_match(), _='_'}}]},
+ {rabbit_route,
+ [{record_name, route},
+ {attributes, record_info(fields, route)},
+ {type, ordered_set},
+ {match, #route{binding = binding_match(), _='_'}}]},
+ {rabbit_reverse_route,
+ [{record_name, reverse_route},
+ {attributes, record_info(fields, reverse_route)},
+ {type, ordered_set},
+ {match, #reverse_route{reverse_binding = reverse_binding_match(),
+ _='_'}}]},
+ {rabbit_topic_trie_node,
+ [{record_name, topic_trie_node},
+ {attributes, record_info(fields, topic_trie_node)},
+ {type, ordered_set},
+ {match, #topic_trie_node{trie_node = trie_node_match(), _='_'}}]},
+ {rabbit_topic_trie_edge,
+ [{record_name, topic_trie_edge},
+ {attributes, record_info(fields, topic_trie_edge)},
+ {type, ordered_set},
+ {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]},
+ {rabbit_topic_trie_binding,
+ [{record_name, topic_trie_binding},
+ {attributes, record_info(fields, topic_trie_binding)},
+ {type, ordered_set},
+ {match, #topic_trie_binding{trie_binding = trie_binding_match(),
+ _='_'}}]},
+ {rabbit_durable_exchange,
+ [{record_name, exchange},
+ {attributes, record_info(fields, exchange)},
+ {disc_copies, [node()]},
+ {match, #exchange{name = exchange_name_match(), _='_'}}]},
+ {rabbit_exchange,
+ [{record_name, exchange},
+ {attributes, record_info(fields, exchange)},
+ {match, #exchange{name = exchange_name_match(), _='_'}}]},
+ {rabbit_exchange_serial,
+ [{record_name, exchange_serial},
+ {attributes, record_info(fields, exchange_serial)},
+ {match, #exchange_serial{name = exchange_name_match(), _='_'}}]},
+ {rabbit_runtime_parameters,
+ [{record_name, runtime_parameters},
+ {attributes, record_info(fields, runtime_parameters)},
+ {disc_copies, [node()]},
+ {match, #runtime_parameters{_='_'}}]},
+ {rabbit_durable_queue,
+ [{record_name, amqqueue},
+ {attributes, amqqueue:fields()},
+ {disc_copies, [node()]},
+ {match, amqqueue:pattern_match_on_name(queue_name_match())}]},
+ {rabbit_queue,
+ [{record_name, amqqueue},
+ {attributes, amqqueue:fields()},
+ {match, amqqueue:pattern_match_on_name(queue_name_match())}]}
+ ]
+ ++ gm:table_definitions()
+ ++ mirrored_supervisor:table_definitions().
+
+binding_match() ->
+ #binding{source = exchange_name_match(),
+ destination = binding_destination_match(),
+ _='_'}.
+reverse_binding_match() ->
+ #reverse_binding{destination = binding_destination_match(),
+ source = exchange_name_match(),
+ _='_'}.
+binding_destination_match() ->
+ resource_match('_').
+trie_node_match() ->
+ #trie_node{exchange_name = exchange_name_match(), _='_'}.
+trie_edge_match() ->
+ #trie_edge{exchange_name = exchange_name_match(), _='_'}.
+trie_binding_match() ->
+ #trie_binding{exchange_name = exchange_name_match(), _='_'}.
+exchange_name_match() ->
+ resource_match(exchange).
+queue_name_match() ->
+ resource_match(queue).
+resource_match(Kind) ->
+ #resource{kind = Kind, _='_'}.
diff --git a/deps/rabbit/src/rabbit_trace.erl b/deps/rabbit/src/rabbit_trace.erl
new file mode 100644
index 0000000000..74b892330e
--- /dev/null
+++ b/deps/rabbit/src/rabbit_trace.erl
@@ -0,0 +1,128 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_trace).
+
+-export([init/1, enabled/1, tap_in/6, tap_out/5, start/1, stop/1]).
+
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+
+-define(TRACE_VHOSTS, trace_vhosts).
+-define(XNAME, <<"amq.rabbitmq.trace">>).
+
+%%----------------------------------------------------------------------------
+
+-type state() :: rabbit_types:exchange() | 'none'.
+
+%%----------------------------------------------------------------------------
+
+-spec init(rabbit_types:vhost()) -> state().
+
+init(VHost) ->
+ case enabled(VHost) of
+ false -> none;
+ true -> {ok, X} = rabbit_exchange:lookup(
+ rabbit_misc:r(VHost, exchange, ?XNAME)),
+ X
+ end.
+
+-spec enabled(rabbit_types:vhost()) -> boolean().
+
+enabled(VHost) ->
+ {ok, VHosts} = application:get_env(rabbit, ?TRACE_VHOSTS),
+ lists:member(VHost, VHosts).
+
+-spec tap_in(rabbit_types:basic_message(), [rabbit_amqqueue:name()],
+ binary(), rabbit_channel:channel_number(),
+ rabbit_types:username(), state()) -> 'ok'.
+
+tap_in(_Msg, _QNames, _ConnName, _ChannelNum, _Username, none) -> ok;
+tap_in(Msg = #basic_message{exchange_name = #resource{name = XName,
+ virtual_host = VHost}},
+ QNames, ConnName, ChannelNum, Username, TraceX) ->
+ trace(TraceX, Msg, <<"publish">>, XName,
+ [{<<"vhost">>, longstr, VHost},
+ {<<"connection">>, longstr, ConnName},
+ {<<"channel">>, signedint, ChannelNum},
+ {<<"user">>, longstr, Username},
+ {<<"routed_queues">>, array,
+ [{longstr, QName#resource.name} || QName <- QNames]}]).
+
+-spec tap_out(rabbit_amqqueue:qmsg(), binary(),
+ rabbit_channel:channel_number(),
+ rabbit_types:username(), state()) -> 'ok'.
+
+tap_out(_Msg, _ConnName, _ChannelNum, _Username, none) -> ok;
+tap_out({#resource{name = QName, virtual_host = VHost},
+ _QPid, _QMsgId, Redelivered, Msg},
+ ConnName, ChannelNum, Username, TraceX) ->
+ RedeliveredNum = case Redelivered of true -> 1; false -> 0 end,
+ trace(TraceX, Msg, <<"deliver">>, QName,
+ [{<<"redelivered">>, signedint, RedeliveredNum},
+ {<<"vhost">>, longstr, VHost},
+ {<<"connection">>, longstr, ConnName},
+ {<<"channel">>, signedint, ChannelNum},
+ {<<"user">>, longstr, Username}]).
+
+%%----------------------------------------------------------------------------
+
+-spec start(rabbit_types:vhost()) -> 'ok'.
+
+start(VHost) ->
+ rabbit_log:info("Enabling tracing for vhost '~s'~n", [VHost]),
+ update_config(fun (VHosts) -> [VHost | VHosts -- [VHost]] end).
+
+-spec stop(rabbit_types:vhost()) -> 'ok'.
+
+stop(VHost) ->
+ rabbit_log:info("Disabling tracing for vhost '~s'~n", [VHost]),
+ update_config(fun (VHosts) -> VHosts -- [VHost] end).
+
+update_config(Fun) ->
+ {ok, VHosts0} = application:get_env(rabbit, ?TRACE_VHOSTS),
+ VHosts = Fun(VHosts0),
+ application:set_env(rabbit, ?TRACE_VHOSTS, VHosts),
+ rabbit_channel:refresh_config_local(),
+ ok.
+
+%%----------------------------------------------------------------------------
+
+trace(#exchange{name = Name}, #basic_message{exchange_name = Name},
+ _RKPrefix, _RKSuffix, _Extra) ->
+ ok;
+trace(X, Msg = #basic_message{content = #content{payload_fragments_rev = PFR}},
+ RKPrefix, RKSuffix, Extra) ->
+ ok = rabbit_basic:publish(
+ X, <<RKPrefix/binary, ".", RKSuffix/binary>>,
+ #'P_basic'{headers = msg_to_table(Msg) ++ Extra}, PFR),
+ ok.
+
+msg_to_table(#basic_message{exchange_name = #resource{name = XName},
+ routing_keys = RoutingKeys,
+ content = Content}) ->
+ #content{properties = Props} =
+ rabbit_binary_parser:ensure_content_decoded(Content),
+ {PropsTable, _Ix} =
+ lists:foldl(fun (K, {L, Ix}) ->
+ V = element(Ix, Props),
+ NewL = case V of
+ undefined -> L;
+ _ -> [{a2b(K), type(V), V} | L]
+ end,
+ {NewL, Ix + 1}
+ end, {[], 2}, record_info(fields, 'P_basic')),
+ [{<<"exchange_name">>, longstr, XName},
+ {<<"routing_keys">>, array, [{longstr, K} || K <- RoutingKeys]},
+ {<<"properties">>, table, PropsTable},
+ {<<"node">>, longstr, a2b(node())}].
+
+a2b(A) -> list_to_binary(atom_to_list(A)).
+
+type(V) when is_list(V) -> table;
+type(V) when is_integer(V) -> signedint;
+type(_V) -> longstr.
diff --git a/deps/rabbit/src/rabbit_tracking.erl b/deps/rabbit/src/rabbit_tracking.erl
new file mode 100644
index 0000000000..a124d20226
--- /dev/null
+++ b/deps/rabbit/src/rabbit_tracking.erl
@@ -0,0 +1,103 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_tracking).
+
+%% Common behaviour and processing functions for tracking components
+%%
+%% See in use:
+%% * rabbit_connection_tracking
+%% * rabbit_channel_tracking
+
+-callback boot() -> ok.
+-callback update_tracked(term()) -> ok.
+-callback handle_cast(term()) -> ok.
+-callback register_tracked(
+ rabbit_types:tracked_connection() |
+ rabbit_types:tracked_channel()) -> 'ok'.
+-callback unregister_tracked(
+ rabbit_types:tracked_connection_id() |
+ rabbit_types:tracked_channel_id()) -> 'ok'.
+-callback count_tracked_items_in(term()) -> non_neg_integer().
+-callback clear_tracking_tables() -> 'ok'.
+-callback shutdown_tracked_items(list(), term()) -> ok.
+
+-export([id/2, count_tracked_items/4, match_tracked_items/2,
+ clear_tracking_table/1, delete_tracking_table/3,
+ delete_tracked_entry/3]).
+
+%%----------------------------------------------------------------------------
+
+-spec id(atom(), term()) ->
+ rabbit_types:tracked_connection_id() | rabbit_types:tracked_channel_id().
+
+id(Node, Name) -> {Node, Name}.
+
+-spec count_tracked_items(function(), integer(), term(), string()) ->
+ non_neg_integer().
+
+count_tracked_items(TableNameFun, CountRecPosition, Key, ContextMsg) ->
+ lists:foldl(fun (Node, Acc) ->
+ Tab = TableNameFun(Node),
+ try
+ N = case mnesia:dirty_read(Tab, Key) of
+ [] -> 0;
+ [Val] ->
+ element(CountRecPosition, Val)
+ end,
+ Acc + N
+ catch _:Err ->
+ rabbit_log:error(
+ "Failed to fetch number of ~p ~p on node ~p:~n~p~n",
+ [ContextMsg, Key, Node, Err]),
+ Acc
+ end
+ end, 0, rabbit_nodes:all_running()).
+
+-spec match_tracked_items(function(), tuple()) -> term().
+
+match_tracked_items(TableNameFun, MatchSpec) ->
+ lists:foldl(
+ fun (Node, Acc) ->
+ Tab = TableNameFun(Node),
+ Acc ++ mnesia:dirty_match_object(
+ Tab,
+ MatchSpec)
+ end, [], rabbit_nodes:all_running()).
+
+-spec clear_tracking_table(atom()) -> ok.
+
+clear_tracking_table(TableName) ->
+ case mnesia:clear_table(TableName) of
+ {atomic, ok} -> ok;
+ {aborted, _} -> ok
+ end.
+
+-spec delete_tracking_table(atom(), node(), string()) -> ok.
+
+delete_tracking_table(TableName, Node, ContextMsg) ->
+ case mnesia:delete_table(TableName) of
+ {atomic, ok} -> ok;
+ {aborted, {no_exists, _}} -> ok;
+ {aborted, Error} ->
+ rabbit_log:error("Failed to delete a ~p table for node ~p: ~p",
+ [ContextMsg, Node, Error]),
+ ok
+ end.
+
+-spec delete_tracked_entry({atom(), atom(), list()}, function(), term()) -> ok.
+
+delete_tracked_entry(_ExistsCheckSpec = {M, F, A}, TableNameFun, Key) ->
+ ClusterNodes = rabbit_nodes:all_running(),
+ ExistsInCluster =
+ lists:any(fun(Node) -> rpc:call(Node, M, F, A) end, ClusterNodes),
+ case ExistsInCluster of
+ false ->
+ [mnesia:dirty_delete(TableNameFun(Node), Key) || Node <- ClusterNodes];
+ true ->
+ ok
+ end.
diff --git a/deps/rabbit/src/rabbit_upgrade.erl b/deps/rabbit/src/rabbit_upgrade.erl
new file mode 100644
index 0000000000..b1b128fecc
--- /dev/null
+++ b/deps/rabbit/src/rabbit_upgrade.erl
@@ -0,0 +1,314 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_upgrade).
+
+-export([maybe_upgrade_mnesia/0, maybe_upgrade_local/0,
+ maybe_migrate_queues_to_per_vhost_storage/0,
+ nodes_running/1, secondary_upgrade/1]).
+
+-include("rabbit.hrl").
+
+-define(VERSION_FILENAME, "schema_version").
+-define(LOCK_FILENAME, "schema_upgrade_lock").
+
+%% -------------------------------------------------------------------
+
+%% The upgrade logic is quite involved, due to the existence of
+%% clusters.
+%%
+%% Firstly, we have two different types of upgrades to do: Mnesia and
+%% everything else. Mnesia upgrades must only be done by one node in
+%% the cluster (we treat a non-clustered node as a single-node
+%% cluster). This is the primary upgrader. The other upgrades need to
+%% be done by all nodes.
+%%
+%% The primary upgrader has to start first (and do its Mnesia
+%% upgrades). Secondary upgraders need to reset their Mnesia database
+%% and then rejoin the cluster. They can't do the Mnesia upgrades as
+%% well and then merge databases since the cookie for each table will
+%% end up different and the merge will fail.
+%%
+%% This in turn means that we need to determine whether we are the
+%% primary or secondary upgrader *before* Mnesia comes up. If we
+%% didn't then the secondary upgrader would try to start Mnesia, and
+%% either hang waiting for a node which is not yet up, or fail since
+%% its schema differs from the other nodes in the cluster.
+%%
+%% Also, the primary upgrader needs to start Mnesia to do its
+%% upgrades, but needs to forcibly load tables rather than wait for
+%% them (in case it was not the last node to shut down, in which case
+%% it would wait forever).
+%%
+%% This in turn means that maybe_upgrade_mnesia/0 has to be patched
+%% into the boot process by prelaunch before the mnesia application is
+%% started. By the time Mnesia is started the upgrades have happened
+%% (on the primary), or Mnesia has been reset (on the secondary) and
+%% rabbit_mnesia:init_db_unchecked/2 can then make the node rejoin the cluster
+%% in the normal way.
+%%
+%% The non-mnesia upgrades are then triggered by
+%% rabbit_mnesia:init_db_unchecked/2. Of course, it's possible for a given
+%% upgrade process to only require Mnesia upgrades, or only require
+%% non-Mnesia upgrades. In the latter case no Mnesia resets and
+%% reclusterings occur.
+%%
+%% The primary upgrader needs to be a disc node. Ideally we would like
+%% it to be the last disc node to shut down (since otherwise there's a
+%% risk of data loss). On each node we therefore record the disc nodes
+%% that were still running when we shut down. A disc node that knows
+%% other nodes were up when it shut down, or a ram node, will refuse
+%% to be the primary upgrader, and will thus not start when upgrades
+%% are needed.
+%%
+%% However, this is racy if several nodes are shut down at once. Since
+%% rabbit records the running nodes, and shuts down before mnesia, the
+%% race manifests as all disc nodes thinking they are not the primary
+%% upgrader. Therefore the user can remove the record of the last disc
+%% node to shut down to get things going again. This may lose any
+%% mnesia changes that happened after the node chosen as the primary
+%% upgrader was shut down.
+
+%% -------------------------------------------------------------------
+
+ensure_backup_taken() ->
+ case filelib:is_file(lock_filename()) of
+ false -> case filelib:is_dir(backup_dir()) of
+ false -> ok = take_backup();
+ _ -> ok
+ end;
+ true ->
+ rabbit_log:error("Found lock file at ~s.
+ Either previous upgrade is in progress or has failed.
+ Database backup path: ~s",
+ [lock_filename(), backup_dir()]),
+ throw({error, previous_upgrade_failed})
+ end.
+
+take_backup() ->
+ BackupDir = backup_dir(),
+ info("upgrades: Backing up mnesia dir to ~p~n", [BackupDir]),
+ case rabbit_mnesia:copy_db(BackupDir) of
+ ok -> info("upgrades: Mnesia dir backed up to ~p~n",
+ [BackupDir]);
+ {error, E} -> throw({could_not_back_up_mnesia_dir, E, BackupDir})
+ end.
+
+ensure_backup_removed() ->
+ case filelib:is_dir(backup_dir()) of
+ true -> ok = remove_backup();
+ _ -> ok
+ end.
+
+remove_backup() ->
+ ok = rabbit_file:recursive_delete([backup_dir()]),
+ info("upgrades: Mnesia backup removed~n", []).
+
+-spec maybe_upgrade_mnesia() -> 'ok'.
+
+maybe_upgrade_mnesia() ->
+ AllNodes = rabbit_mnesia:cluster_nodes(all),
+ ok = rabbit_mnesia_rename:maybe_finish(AllNodes),
+ %% Mnesia upgrade is the first upgrade scope,
+ %% so we should create a backup here if there are any upgrades
+ case rabbit_version:all_upgrades_required([mnesia, local, message_store]) of
+ {error, starting_from_scratch} ->
+ ok;
+ {error, version_not_available} ->
+ case AllNodes of
+ [] -> die("Cluster upgrade needed but upgrading from "
+ "< 2.1.1.~nUnfortunately you will need to "
+ "rebuild the cluster.", []);
+ _ -> ok
+ end;
+ {error, _} = Err ->
+ throw(Err);
+ {ok, []} ->
+ ok;
+ {ok, Upgrades} ->
+ ensure_backup_taken(),
+ run_mnesia_upgrades(proplists:get_value(mnesia, Upgrades, []),
+ AllNodes)
+ end.
+
+run_mnesia_upgrades([], _) -> ok;
+run_mnesia_upgrades(Upgrades, AllNodes) ->
+ case upgrade_mode(AllNodes) of
+ primary -> primary_upgrade(Upgrades, AllNodes);
+ secondary -> secondary_upgrade(AllNodes)
+ end.
+
+upgrade_mode(AllNodes) ->
+ case nodes_running(AllNodes) of
+ [] ->
+ AfterUs = rabbit_nodes:all_running() -- [node()],
+ case {node_type_legacy(), AfterUs} of
+ {disc, []} ->
+ primary;
+ {disc, _} ->
+ Filename = rabbit_node_monitor:running_nodes_filename(),
+ die("Cluster upgrade needed but other disc nodes shut "
+ "down after this one.~nPlease first start the last "
+ "disc node to shut down.~n~nNote: if several disc "
+ "nodes were shut down simultaneously they may "
+ "all~nshow this message. In which case, remove "
+ "the lock file on one of them and~nstart that node. "
+ "The lock file on this node is:~n~n ~s ", [Filename]);
+ {ram, _} ->
+ die("Cluster upgrade needed but this is a ram node.~n"
+ "Please first start the last disc node to shut down.",
+ [])
+ end;
+ [Another|_] ->
+ MyVersion = rabbit_version:desired_for_scope(mnesia),
+ case rpc:call(Another, rabbit_version, desired_for_scope,
+ [mnesia]) of
+ {badrpc, {'EXIT', {undef, _}}} ->
+ die_because_cluster_upgrade_needed(unknown_old_version,
+ MyVersion);
+ {badrpc, Reason} ->
+ die_because_cluster_upgrade_needed({unknown, Reason},
+ MyVersion);
+ CV -> case rabbit_version:matches(
+ MyVersion, CV) of
+ true -> secondary;
+ false -> die_because_cluster_upgrade_needed(
+ CV, MyVersion)
+ end
+ end
+ end.
+
+-spec die_because_cluster_upgrade_needed(any(), any()) -> no_return().
+
+die_because_cluster_upgrade_needed(ClusterVersion, MyVersion) ->
+ %% The other node(s) are running an
+ %% unexpected version.
+ die("Cluster upgrade needed but other nodes are "
+ "running ~p~nand I want ~p",
+ [ClusterVersion, MyVersion]).
+
+-spec die(string(), list()) -> no_return().
+
+die(Msg, Args) ->
+ %% We don't throw or exit here since that gets thrown
+ %% straight out into do_boot, generating an erl_crash.dump
+ %% and displaying any error message in a confusing way.
+ rabbit_log:error(Msg, Args),
+ Str = rabbit_misc:format(
+ "~n~n****~n~n" ++ Msg ++ "~n~n****~n~n~n", Args),
+ io:format(Str),
+ error_logger:logfile(close),
+ case application:get_env(rabbit, halt_on_upgrade_failure) of
+ {ok, false} -> throw({upgrade_error, Str});
+ _ -> halt(1) %% i.e. true or undefined
+ end.
+
+primary_upgrade(Upgrades, Nodes) ->
+ Others = Nodes -- [node()],
+ ok = apply_upgrades(
+ mnesia,
+ Upgrades,
+ fun () ->
+ rabbit_table:force_load(),
+ case Others of
+ [] -> ok;
+ _ -> info("mnesia upgrades: Breaking cluster~n", []),
+ [{atomic, ok} = mnesia:del_table_copy(schema, Node)
+ || Node <- Others]
+ end
+ end),
+ ok.
+
+secondary_upgrade(AllNodes) ->
+ %% must do this before we wipe out schema
+ NodeType = node_type_legacy(),
+ rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
+ cannot_delete_schema),
+ rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+ ok = rabbit_mnesia:init_db_unchecked(AllNodes, NodeType),
+ ok = rabbit_version:record_desired_for_scope(mnesia),
+ ok.
+
+nodes_running(Nodes) ->
+ [N || N <- Nodes, rabbit:is_running(N)].
+
+%% -------------------------------------------------------------------
+
+-spec maybe_upgrade_local() ->
+ 'ok' |
+ 'version_not_available' |
+ 'starting_from_scratch'.
+
+maybe_upgrade_local() ->
+ case rabbit_version:upgrades_required(local) of
+ {error, version_not_available} -> version_not_available;
+ {error, starting_from_scratch} -> starting_from_scratch;
+ {error, _} = Err -> throw(Err);
+ {ok, []} -> ensure_backup_removed(),
+ ok;
+ {ok, Upgrades} -> mnesia:stop(),
+ ok = apply_upgrades(local, Upgrades,
+ fun () -> ok end),
+ ok
+ end.
+
+%% -------------------------------------------------------------------
+
+maybe_migrate_queues_to_per_vhost_storage() ->
+ Result = case rabbit_version:upgrades_required(message_store) of
+ {error, version_not_available} -> version_not_available;
+ {error, starting_from_scratch} ->
+ starting_from_scratch;
+ {error, _} = Err -> throw(Err);
+ {ok, []} -> ok;
+ {ok, Upgrades} -> apply_upgrades(message_store,
+ Upgrades,
+ fun() -> ok end),
+ ok
+ end,
+ %% Message store upgrades should be
+ %% the last group.
+ %% Backup can be deleted here.
+ ensure_backup_removed(),
+ Result.
+
+%% -------------------------------------------------------------------
+
+apply_upgrades(Scope, Upgrades, Fun) ->
+ ok = rabbit_file:lock_file(lock_filename()),
+ info("~s upgrades: ~w to apply~n", [Scope, length(Upgrades)]),
+ rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+ Fun(),
+ [apply_upgrade(Scope, Upgrade) || Upgrade <- Upgrades],
+ info("~s upgrades: All upgrades applied successfully~n", [Scope]),
+ ok = rabbit_version:record_desired_for_scope(Scope),
+ ok = file:delete(lock_filename()).
+
+apply_upgrade(Scope, {M, F}) ->
+ info("~s upgrades: Applying ~w:~w~n", [Scope, M, F]),
+ ok = apply(M, F, []).
+
+%% -------------------------------------------------------------------
+
+dir() -> rabbit_mnesia:dir().
+
+lock_filename() -> lock_filename(dir()).
+lock_filename(Dir) -> filename:join(Dir, ?LOCK_FILENAME).
+backup_dir() -> dir() ++ "-upgrade-backup".
+
+node_type_legacy() ->
+ %% This is pretty ugly but we can't start Mnesia and ask it (will
+ %% hang), we can't look at the config file (may not include us
+ %% even if we're a disc node). We also can't use
+ %% rabbit_mnesia:node_type/0 because that will give false
+ %% positives on Rabbit up to 2.5.1.
+ case filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")) of
+ true -> disc;
+ false -> ram
+ end.
+
+info(Msg, Args) -> rabbit_log:info(Msg, Args).
diff --git a/deps/rabbit/src/rabbit_upgrade_functions.erl b/deps/rabbit/src/rabbit_upgrade_functions.erl
new file mode 100644
index 0000000000..59417c72bb
--- /dev/null
+++ b/deps/rabbit/src/rabbit_upgrade_functions.erl
@@ -0,0 +1,662 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_upgrade_functions).
+
+%% If you are tempted to add include("rabbit.hrl"). here, don't. Using record
+%% defs here leads to pain later.
+
+-compile([nowarn_export_all, export_all]).
+
+-rabbit_upgrade({remove_user_scope, mnesia, []}).
+-rabbit_upgrade({hash_passwords, mnesia, []}).
+-rabbit_upgrade({add_ip_to_listener, mnesia, []}).
+-rabbit_upgrade({add_opts_to_listener, mnesia, [add_ip_to_listener]}).
+-rabbit_upgrade({internal_exchanges, mnesia, []}).
+-rabbit_upgrade({user_to_internal_user, mnesia, [hash_passwords]}).
+-rabbit_upgrade({topic_trie, mnesia, []}).
+-rabbit_upgrade({semi_durable_route, mnesia, []}).
+-rabbit_upgrade({exchange_event_serial, mnesia, []}).
+-rabbit_upgrade({trace_exchanges, mnesia, [internal_exchanges]}).
+-rabbit_upgrade({user_admin_to_tags, mnesia, [user_to_internal_user]}).
+-rabbit_upgrade({ha_mirrors, mnesia, []}).
+-rabbit_upgrade({gm, mnesia, []}).
+-rabbit_upgrade({exchange_scratch, mnesia, [trace_exchanges]}).
+-rabbit_upgrade({mirrored_supervisor, mnesia, []}).
+-rabbit_upgrade({topic_trie_node, mnesia, []}).
+-rabbit_upgrade({runtime_parameters, mnesia, []}).
+-rabbit_upgrade({exchange_scratches, mnesia, [exchange_scratch]}).
+-rabbit_upgrade({policy, mnesia,
+ [exchange_scratches, ha_mirrors]}).
+-rabbit_upgrade({sync_slave_pids, mnesia, [policy]}).
+-rabbit_upgrade({no_mirror_nodes, mnesia, [sync_slave_pids]}).
+-rabbit_upgrade({gm_pids, mnesia, [no_mirror_nodes]}).
+-rabbit_upgrade({exchange_decorators, mnesia, [policy]}).
+-rabbit_upgrade({policy_apply_to, mnesia, [runtime_parameters]}).
+-rabbit_upgrade({queue_decorators, mnesia, [gm_pids]}).
+-rabbit_upgrade({internal_system_x, mnesia, [exchange_decorators]}).
+-rabbit_upgrade({cluster_name, mnesia, [runtime_parameters]}).
+-rabbit_upgrade({down_slave_nodes, mnesia, [queue_decorators]}).
+-rabbit_upgrade({queue_state, mnesia, [down_slave_nodes]}).
+-rabbit_upgrade({recoverable_slaves, mnesia, [queue_state]}).
+-rabbit_upgrade({policy_version, mnesia, [recoverable_slaves]}).
+-rabbit_upgrade({slave_pids_pending_shutdown, mnesia, [policy_version]}).
+-rabbit_upgrade({user_password_hashing, mnesia, [hash_passwords]}).
+-rabbit_upgrade({operator_policies, mnesia, [slave_pids_pending_shutdown, internal_system_x]}).
+-rabbit_upgrade({vhost_limits, mnesia, []}).
+-rabbit_upgrade({queue_vhost_field, mnesia, [operator_policies]}).
+-rabbit_upgrade({topic_permission, mnesia, []}).
+-rabbit_upgrade({queue_options, mnesia, [queue_vhost_field]}).
+-rabbit_upgrade({exchange_options, mnesia, [operator_policies]}).
+
+%% -------------------------------------------------------------------
+
+%% replaces vhost.dummy (used to avoid having a single-field record
+%% which Mnesia doesn't like) with vhost.limits (which is actually
+%% used)
+
+-spec vhost_limits() -> 'ok'.
+
+vhost_limits() ->
+ transform(
+ rabbit_vhost,
+ fun ({vhost, VHost, _Dummy}) ->
+ {vhost, VHost, undefined}
+ end,
+ [virtual_host, limits]).
+
+%% It's a bad idea to use records or record_info here, even for the
+%% destination form. Because in the future, the destination form of
+%% your current transform may not match the record any more, and it
+%% would be messy to have to go back and fix old transforms at that
+%% point.
+
+-spec remove_user_scope() -> 'ok'.
+
+remove_user_scope() ->
+ transform(
+ rabbit_user_permission,
+ fun ({user_permission, UV, {permission, _Scope, Conf, Write, Read}}) ->
+ {user_permission, UV, {permission, Conf, Write, Read}}
+ end,
+ [user_vhost, permission]).
+
+%% this is an early migration that hashes passwords using MD5,
+%% only relevant to those migrating from 2.1.1.
+%% all users created after in 3.6.0 or later will use SHA-256 (unless configured
+%% otherwise)
+
+-spec hash_passwords() -> 'ok'.
+
+hash_passwords() ->
+ transform(
+ rabbit_user,
+ fun ({user, Username, Password, IsAdmin}) ->
+ Hash = rabbit_auth_backend_internal:hash_password(rabbit_password_hashing_md5, Password),
+ {user, Username, Hash, IsAdmin}
+ end,
+ [username, password_hash, is_admin]).
+
+-spec add_ip_to_listener() -> 'ok'.
+
+add_ip_to_listener() ->
+ transform(
+ rabbit_listener,
+ fun ({listener, Node, Protocol, Host, Port}) ->
+ {listener, Node, Protocol, Host, {0,0,0,0}, Port}
+ end,
+ [node, protocol, host, ip_address, port]).
+
+-spec add_opts_to_listener() -> 'ok'.
+
+add_opts_to_listener() ->
+ transform(
+ rabbit_listener,
+ fun ({listener, Node, Protocol, Host, IP, Port}) ->
+ {listener, Node, Protocol, Host, IP, Port, []}
+ end,
+ [node, protocol, host, ip_address, port, opts]).
+
+-spec internal_exchanges() -> 'ok'.
+
+internal_exchanges() ->
+ Tables = [rabbit_exchange, rabbit_durable_exchange],
+ AddInternalFun =
+ fun ({exchange, Name, Type, Durable, AutoDelete, Args}) ->
+ {exchange, Name, Type, Durable, AutoDelete, false, Args}
+ end,
+ [ ok = transform(T,
+ AddInternalFun,
+ [name, type, durable, auto_delete, internal, arguments])
+ || T <- Tables ],
+ ok.
+
+-spec user_to_internal_user() -> 'ok'.
+
+user_to_internal_user() ->
+ transform(
+ rabbit_user,
+ fun({user, Username, PasswordHash, IsAdmin}) ->
+ {internal_user, Username, PasswordHash, IsAdmin}
+ end,
+ [username, password_hash, is_admin], internal_user).
+
+-spec topic_trie() -> 'ok'.
+
+topic_trie() ->
+ create(rabbit_topic_trie_edge, [{record_name, topic_trie_edge},
+ {attributes, [trie_edge, node_id]},
+ {type, ordered_set}]),
+ create(rabbit_topic_trie_binding, [{record_name, topic_trie_binding},
+ {attributes, [trie_binding, value]},
+ {type, ordered_set}]).
+
+-spec semi_durable_route() -> 'ok'.
+
+semi_durable_route() ->
+ create(rabbit_semi_durable_route, [{record_name, route},
+ {attributes, [binding, value]}]).
+
+-spec exchange_event_serial() -> 'ok'.
+
+exchange_event_serial() ->
+ create(rabbit_exchange_serial, [{record_name, exchange_serial},
+ {attributes, [name, next]}]).
+
+-spec trace_exchanges() -> 'ok'.
+
+trace_exchanges() ->
+ [declare_exchange(
+ rabbit_misc:r(VHost, exchange, <<"amq.rabbitmq.trace">>), topic) ||
+ VHost <- rabbit_vhost:list_names()],
+ ok.
+
+-spec user_admin_to_tags() -> 'ok'.
+
+user_admin_to_tags() ->
+ transform(
+ rabbit_user,
+ fun({internal_user, Username, PasswordHash, true}) ->
+ {internal_user, Username, PasswordHash, [administrator]};
+ ({internal_user, Username, PasswordHash, false}) ->
+ {internal_user, Username, PasswordHash, [management]}
+ end,
+ [username, password_hash, tags], internal_user).
+
+-spec ha_mirrors() -> 'ok'.
+
+ha_mirrors() ->
+ Tables = [rabbit_queue, rabbit_durable_queue],
+ AddMirrorPidsFun =
+ fun ({amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid}) ->
+ {amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid,
+ [], undefined}
+ end,
+ [ ok = transform(T,
+ AddMirrorPidsFun,
+ [name, durable, auto_delete, exclusive_owner, arguments,
+ pid, slave_pids, mirror_nodes])
+ || T <- Tables ],
+ ok.
+
+-spec gm() -> 'ok'.
+
+gm() ->
+ create(gm_group, [{record_name, gm_group},
+ {attributes, [name, version, members]}]).
+
+-spec exchange_scratch() -> 'ok'.
+
+exchange_scratch() ->
+ ok = exchange_scratch(rabbit_exchange),
+ ok = exchange_scratch(rabbit_durable_exchange).
+
+exchange_scratch(Table) ->
+ transform(
+ Table,
+ fun ({exchange, Name, Type, Dur, AutoDel, Int, Args}) ->
+ {exchange, Name, Type, Dur, AutoDel, Int, Args, undefined}
+ end,
+ [name, type, durable, auto_delete, internal, arguments, scratch]).
+
+-spec mirrored_supervisor() -> 'ok'.
+
+mirrored_supervisor() ->
+ create(mirrored_sup_childspec,
+ [{record_name, mirrored_sup_childspec},
+ {attributes, [key, mirroring_pid, childspec]}]).
+
+-spec topic_trie_node() -> 'ok'.
+
+topic_trie_node() ->
+ create(rabbit_topic_trie_node,
+ [{record_name, topic_trie_node},
+ {attributes, [trie_node, edge_count, binding_count]},
+ {type, ordered_set}]).
+
+-spec runtime_parameters() -> 'ok'.
+
+runtime_parameters() ->
+ create(rabbit_runtime_parameters,
+ [{record_name, runtime_parameters},
+ {attributes, [key, value]},
+ {disc_copies, [node()]}]).
+
+exchange_scratches() ->
+ ok = exchange_scratches(rabbit_exchange),
+ ok = exchange_scratches(rabbit_durable_exchange).
+
+exchange_scratches(Table) ->
+ transform(
+ Table,
+ fun ({exchange, Name, Type = <<"x-federation">>, Dur, AutoDel, Int, Args,
+ Scratch}) ->
+ Scratches = orddict:store(federation, Scratch, orddict:new()),
+ {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches};
+ %% We assert here that nothing else uses the scratch mechanism ATM
+ ({exchange, Name, Type, Dur, AutoDel, Int, Args, undefined}) ->
+ {exchange, Name, Type, Dur, AutoDel, Int, Args, undefined}
+ end,
+ [name, type, durable, auto_delete, internal, arguments, scratches]).
+
+-spec policy() -> 'ok'.
+
+policy() ->
+ ok = exchange_policy(rabbit_exchange),
+ ok = exchange_policy(rabbit_durable_exchange),
+ ok = queue_policy(rabbit_queue),
+ ok = queue_policy(rabbit_durable_queue).
+
+exchange_policy(Table) ->
+ transform(
+ Table,
+ fun ({exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches}) ->
+ {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches,
+ undefined}
+ end,
+ [name, type, durable, auto_delete, internal, arguments, scratches,
+ policy]).
+
+queue_policy(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name, Dur, AutoDel, Excl, Args, Pid, SPids, MNodes}) ->
+ {amqqueue, Name, Dur, AutoDel, Excl, Args, Pid, SPids, MNodes,
+ undefined}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid,
+ slave_pids, mirror_nodes, policy]).
+
+-spec sync_slave_pids() -> 'ok'.
+
+sync_slave_pids() ->
+ Tables = [rabbit_queue, rabbit_durable_queue],
+ AddSyncSlavesFun =
+ fun ({amqqueue, N, D, AD, Excl, Args, Pid, SPids, MNodes, Pol}) ->
+ {amqqueue, N, D, AD, Excl, Args, Pid, SPids, [], MNodes, Pol}
+ end,
+ [ok = transform(T, AddSyncSlavesFun,
+ [name, durable, auto_delete, exclusive_owner, arguments,
+ pid, slave_pids, sync_slave_pids, mirror_nodes, policy])
+ || T <- Tables],
+ ok.
+
+-spec no_mirror_nodes() -> 'ok'.
+
+no_mirror_nodes() ->
+ Tables = [rabbit_queue, rabbit_durable_queue],
+ RemoveMirrorNodesFun =
+ fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, _MNodes, Pol}) ->
+ {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol}
+ end,
+ [ok = transform(T, RemoveMirrorNodesFun,
+ [name, durable, auto_delete, exclusive_owner, arguments,
+ pid, slave_pids, sync_slave_pids, policy])
+ || T <- Tables],
+ ok.
+
+-spec gm_pids() -> 'ok'.
+
+gm_pids() ->
+ Tables = [rabbit_queue, rabbit_durable_queue],
+ AddGMPidsFun =
+ fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol}) ->
+ {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol, []}
+ end,
+ [ok = transform(T, AddGMPidsFun,
+ [name, durable, auto_delete, exclusive_owner, arguments,
+ pid, slave_pids, sync_slave_pids, policy, gm_pids])
+ || T <- Tables],
+ ok.
+
+-spec exchange_decorators() -> 'ok'.
+
+exchange_decorators() ->
+ ok = exchange_decorators(rabbit_exchange),
+ ok = exchange_decorators(rabbit_durable_exchange).
+
+exchange_decorators(Table) ->
+ transform(
+ Table,
+ fun ({exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches,
+ Policy}) ->
+ {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches, Policy,
+ {[], []}}
+ end,
+ [name, type, durable, auto_delete, internal, arguments, scratches, policy,
+ decorators]).
+
+-spec policy_apply_to() -> 'ok'.
+
+policy_apply_to() ->
+ transform(
+ rabbit_runtime_parameters,
+ fun ({runtime_parameters, Key = {_VHost, <<"policy">>, _Name}, Value}) ->
+ ApplyTo = apply_to(proplists:get_value(<<"definition">>, Value)),
+ {runtime_parameters, Key, [{<<"apply-to">>, ApplyTo} | Value]};
+ ({runtime_parameters, Key, Value}) ->
+ {runtime_parameters, Key, Value}
+ end,
+ [key, value]),
+ rabbit_policy:invalidate(),
+ ok.
+
+apply_to(Def) ->
+ case [proplists:get_value(K, Def) ||
+ K <- [<<"federation-upstream-set">>, <<"ha-mode">>]] of
+ [undefined, undefined] -> <<"all">>;
+ [_, undefined] -> <<"exchanges">>;
+ [undefined, _] -> <<"queues">>;
+ [_, _] -> <<"all">>
+ end.
+
+-spec queue_decorators() -> 'ok'.
+
+queue_decorators() ->
+ ok = queue_decorators(rabbit_queue),
+ ok = queue_decorators(rabbit_durable_queue).
+
+queue_decorators(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, Policy, GmPids}) ->
+ {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, Policy, GmPids, []}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, policy, gm_pids, decorators]).
+
+-spec internal_system_x() -> 'ok'.
+
+internal_system_x() ->
+ transform(
+ rabbit_durable_exchange,
+ fun ({exchange, Name = {resource, _, _, <<"amq.rabbitmq.", _/binary>>},
+ Type, Dur, AutoDel, _Int, Args, Scratches, Policy, Decorators}) ->
+ {exchange, Name, Type, Dur, AutoDel, true, Args, Scratches,
+ Policy, Decorators};
+ (X) ->
+ X
+ end,
+ [name, type, durable, auto_delete, internal, arguments, scratches, policy,
+ decorators]).
+
+-spec cluster_name() -> 'ok'.
+
+cluster_name() ->
+ {atomic, ok} = mnesia:transaction(fun cluster_name_tx/0),
+ ok.
+
+cluster_name_tx() ->
+ %% mnesia:transform_table/4 does not let us delete records
+ T = rabbit_runtime_parameters,
+ mnesia:write_lock_table(T),
+ Ks = [K || {_VHost, <<"federation">>, <<"local-nodename">>} = K
+ <- mnesia:all_keys(T)],
+ case Ks of
+ [] -> ok;
+ [K|Tl] -> [{runtime_parameters, _K, Name}] = mnesia:read(T, K, write),
+ R = {runtime_parameters, cluster_name, Name},
+ mnesia:write(T, R, write),
+ case Tl of
+ [] -> ok;
+ _ -> {VHost, _, _} = K,
+ error_logger:warning_msg(
+ "Multiple local-nodenames found, picking '~s' "
+ "from '~s' for cluster name~n", [Name, VHost])
+ end
+ end,
+ [mnesia:delete(T, K, write) || K <- Ks],
+ ok.
+
+-spec down_slave_nodes() -> 'ok'.
+
+down_slave_nodes() ->
+ ok = down_slave_nodes(rabbit_queue),
+ ok = down_slave_nodes(rabbit_durable_queue).
+
+down_slave_nodes(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, Policy, GmPids, Decorators}) ->
+ {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, [], Policy, GmPids, Decorators}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, down_slave_nodes, policy, gm_pids, decorators]).
+
+-spec queue_state() -> 'ok'.
+
+queue_state() ->
+ ok = queue_state(rabbit_queue),
+ ok = queue_state(rabbit_durable_queue).
+
+queue_state(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators}) ->
+ {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators,
+ live}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, down_slave_nodes, policy, gm_pids, decorators, state]).
+
+-spec recoverable_slaves() -> 'ok'.
+
+recoverable_slaves() ->
+ ok = recoverable_slaves(rabbit_queue),
+ ok = recoverable_slaves(rabbit_durable_queue).
+
+recoverable_slaves(Table) ->
+ transform(
+ Table, fun (Q) -> Q end, %% Don't change shape of record
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, recoverable_slaves, policy, gm_pids, decorators,
+ state]).
+
+policy_version() ->
+ ok = policy_version(rabbit_queue),
+ ok = policy_version(rabbit_durable_queue).
+
+policy_version(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators,
+ State}) ->
+ {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators,
+ State, 0}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, recoverable_slaves, policy, gm_pids, decorators, state,
+ policy_version]).
+
+slave_pids_pending_shutdown() ->
+ ok = slave_pids_pending_shutdown(rabbit_queue),
+ ok = slave_pids_pending_shutdown(rabbit_durable_queue).
+
+slave_pids_pending_shutdown(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators,
+ State, PolicyVersion}) ->
+ {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators,
+ State, PolicyVersion, []}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, recoverable_slaves, policy, gm_pids, decorators, state,
+ policy_version, slave_pids_pending_shutdown]).
+
+-spec operator_policies() -> 'ok'.
+
+operator_policies() ->
+ ok = exchange_operator_policies(rabbit_exchange),
+ ok = exchange_operator_policies(rabbit_durable_exchange),
+ ok = queue_operator_policies(rabbit_queue),
+ ok = queue_operator_policies(rabbit_durable_queue).
+
+exchange_operator_policies(Table) ->
+ transform(
+ Table,
+ fun ({exchange, Name, Type, Dur, AutoDel, Internal,
+ Args, Scratches, Policy, Decorators}) ->
+ {exchange, Name, Type, Dur, AutoDel, Internal,
+ Args, Scratches, Policy, undefined, Decorators}
+ end,
+ [name, type, durable, auto_delete, internal, arguments, scratches, policy,
+ operator_policy, decorators]).
+
+queue_operator_policies(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators,
+ State, PolicyVersion, SlavePidsPendingShutdown}) ->
+ {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, undefined, GmPids,
+ Decorators, State, PolicyVersion, SlavePidsPendingShutdown}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, recoverable_slaves, policy, operator_policy,
+ gm_pids, decorators, state, policy_version, slave_pids_pending_shutdown]).
+
+-spec queue_vhost_field() -> 'ok'.
+
+queue_vhost_field() ->
+ ok = queue_vhost_field(rabbit_queue),
+ ok = queue_vhost_field(rabbit_durable_queue),
+ {atomic, ok} = mnesia:add_table_index(rabbit_queue, vhost),
+ {atomic, ok} = mnesia:add_table_index(rabbit_durable_queue, vhost),
+ ok.
+
+queue_vhost_field(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name = {resource, VHost, queue, _QName}, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, OperatorPolicy, GmPids, Decorators,
+ State, PolicyVersion, SlavePidsPendingShutdown}) ->
+ {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, OperatorPolicy, GmPids, Decorators,
+ State, PolicyVersion, SlavePidsPendingShutdown, VHost}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, recoverable_slaves, policy, operator_policy,
+ gm_pids, decorators, state, policy_version, slave_pids_pending_shutdown, vhost]).
+
+-spec queue_options() -> 'ok'.
+
+queue_options() ->
+ ok = queue_options(rabbit_queue),
+ ok = queue_options(rabbit_durable_queue),
+ ok.
+
+queue_options(Table) ->
+ transform(
+ Table,
+ fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, OperatorPolicy, GmPids, Decorators,
+ State, PolicyVersion, SlavePidsPendingShutdown, VHost}) ->
+ {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments,
+ Pid, SlavePids, SyncSlavePids, DSN, Policy, OperatorPolicy, GmPids, Decorators,
+ State, PolicyVersion, SlavePidsPendingShutdown, VHost, #{}}
+ end,
+ [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids,
+ sync_slave_pids, recoverable_slaves, policy, operator_policy,
+ gm_pids, decorators, state, policy_version, slave_pids_pending_shutdown, vhost, options]).
+
+%% Prior to 3.6.0, passwords were hashed using MD5, this populates
+%% existing records with said default. Users created with 3.6.0+ will
+%% have internal_user.hashing_algorithm populated by the internal
+%% authn backend.
+
+-spec user_password_hashing() -> 'ok'.
+
+user_password_hashing() ->
+ transform(
+ rabbit_user,
+ fun ({internal_user, Username, Hash, Tags}) ->
+ {internal_user, Username, Hash, Tags, rabbit_password_hashing_md5}
+ end,
+ [username, password_hash, tags, hashing_algorithm]).
+
+-spec topic_permission() -> 'ok'.
+topic_permission() ->
+ create(rabbit_topic_permission,
+ [{record_name, topic_permission},
+ {attributes, [topic_permission_key, permission]},
+ {disc_copies, [node()]}]).
+
+-spec exchange_options() -> 'ok'.
+
+exchange_options() ->
+ ok = exchange_options(rabbit_exchange),
+ ok = exchange_options(rabbit_durable_exchange).
+
+exchange_options(Table) ->
+ transform(
+ Table,
+ fun ({exchange, Name, Type, Dur, AutoDel, Internal,
+ Args, Scratches, Policy, OperatorPolicy, Decorators}) ->
+ {exchange, Name, Type, Dur, AutoDel, Internal,
+ Args, Scratches, Policy, OperatorPolicy, Decorators, #{}}
+ end,
+ [name, type, durable, auto_delete, internal, arguments, scratches, policy,
+ operator_policy, decorators, options]).
+
+%%--------------------------------------------------------------------
+
+transform(TableName, Fun, FieldList) ->
+ rabbit_table:wait([TableName]),
+ {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList),
+ ok.
+
+transform(TableName, Fun, FieldList, NewRecordName) ->
+ rabbit_table:wait([TableName]),
+ {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList,
+ NewRecordName),
+ ok.
+
+create(Tab, TabDef) ->
+ rabbit_log:debug("Will create a schema table named '~s'", [Tab]),
+ {atomic, ok} = mnesia:create_table(Tab, TabDef),
+ ok.
+
+%% Dumb replacement for rabbit_exchange:declare that does not require
+%% the exchange type registry or worker pool to be running by dint of
+%% not validating anything and assuming the exchange type does not
+%% require serialisation. NB: this assumes the
+%% pre-exchange-scratch-space format
+declare_exchange(XName, Type) ->
+ X = {exchange, XName, Type, true, false, false, []},
+ ok = mnesia:dirty_write(rabbit_durable_exchange, X).
diff --git a/deps/rabbit/src/rabbit_upgrade_preparation.erl b/deps/rabbit/src/rabbit_upgrade_preparation.erl
new file mode 100644
index 0000000000..fc1de24610
--- /dev/null
+++ b/deps/rabbit/src/rabbit_upgrade_preparation.erl
@@ -0,0 +1,51 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_upgrade_preparation).
+
+-export([await_online_quorum_plus_one/1, await_online_synchronised_mirrors/1]).
+
+%%
+%% API
+%%
+
+-define(SAMPLING_INTERVAL, 200).
+
+await_online_quorum_plus_one(Timeout) ->
+ Iterations = ceil(Timeout / ?SAMPLING_INTERVAL),
+ do_await_safe_online_quorum(Iterations).
+
+
+await_online_synchronised_mirrors(Timeout) ->
+ Iterations = ceil(Timeout / ?SAMPLING_INTERVAL),
+ do_await_online_synchronised_mirrors(Iterations).
+
+
+%%
+%% Implementation
+%%
+
+do_await_safe_online_quorum(0) ->
+ false;
+do_await_safe_online_quorum(IterationsLeft) ->
+ case rabbit_quorum_queue:list_with_minimum_quorum() of
+ [] -> true;
+ List when is_list(List) ->
+ timer:sleep(?SAMPLING_INTERVAL),
+ do_await_safe_online_quorum(IterationsLeft - 1)
+ end.
+
+
+do_await_online_synchronised_mirrors(0) ->
+ false;
+do_await_online_synchronised_mirrors(IterationsLeft) ->
+ case rabbit_amqqueue:list_local_mirrored_classic_without_synchronised_mirrors() of
+ [] -> true;
+ List when is_list(List) ->
+ timer:sleep(?SAMPLING_INTERVAL),
+ do_await_online_synchronised_mirrors(IterationsLeft - 1)
+ end.
diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl
new file mode 100644
index 0000000000..cf6fa4a189
--- /dev/null
+++ b/deps/rabbit/src/rabbit_variable_queue.erl
@@ -0,0 +1,3015 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_variable_queue).
+
+-export([init/3, terminate/2, delete_and_terminate/2, delete_crashed/1,
+ purge/1, purge_acks/1,
+ publish/6, publish_delivered/5,
+ batch_publish/4, batch_publish_delivered/4,
+ discard/4, drain_confirmed/1,
+ dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2,
+ ackfold/4, fold/3, len/1, is_empty/1, depth/1,
+ set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1,
+ handle_pre_hibernate/1, resume/1, msg_rates/1,
+ info/2, invoke/3, is_duplicate/2, set_queue_mode/2,
+ zip_msgs_and_acks/4, multiple_routing_keys/0, handle_info/2]).
+
+-export([start/2, stop/1]).
+
+%% exported for testing only
+-export([start_msg_store/3, stop_msg_store/1, init/6]).
+
+-export([move_messages_to_vhost_store/0]).
+
+-export([migrate_queue/3, migrate_message/3, get_per_vhost_store_client/2,
+ get_global_store_client/1, log_upgrade_verbose/1,
+ log_upgrade_verbose/2]).
+
+-include_lib("stdlib/include/qlc.hrl").
+
+-define(QUEUE_MIGRATION_BATCH_SIZE, 100).
+-define(EMPTY_START_FUN_STATE, {fun (ok) -> finished end, ok}).
+
+%%----------------------------------------------------------------------------
+%% Messages, and their position in the queue, can be in memory or on
+%% disk, or both. Persistent messages will have both message and
+%% position pushed to disk as soon as they arrive; transient messages
+%% can be written to disk (and thus both types can be evicted from
+%% memory) under memory pressure. The question of whether a message is
+%% in RAM and whether it is persistent are orthogonal.
+%%
+%% Messages are persisted using the queue index and the message
+%% store. Normally the queue index holds the position of the message
+%% *within this queue* along with a couple of small bits of metadata,
+%% while the message store holds the message itself (including headers
+%% and other properties).
+%%
+%% However, as an optimisation, small messages can be embedded
+%% directly in the queue index and bypass the message store
+%% altogether.
+%%
+%% Definitions:
+%%
+%% alpha: this is a message where both the message itself, and its
+%% position within the queue are held in RAM
+%%
+%% beta: this is a message where the message itself is only held on
+%% disk (if persisted to the message store) but its position
+%% within the queue is held in RAM.
+%%
+%% gamma: this is a message where the message itself is only held on
+%% disk, but its position is both in RAM and on disk.
+%%
+%% delta: this is a collection of messages, represented by a single
+%% term, where the messages and their position are only held on
+%% disk.
+%%
+%% Note that for persistent messages, the message and its position
+%% within the queue are always held on disk, *in addition* to being in
+%% one of the above classifications.
+%%
+%% Also note that within this code, the term gamma seldom
+%% appears. It's frequently the case that gammas are defined by betas
+%% who have had their queue position recorded on disk.
+%%
+%% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though
+%% many of these steps are frequently skipped. q1 and q4 only hold
+%% alphas, q2 and q3 hold both betas and gammas. When a message
+%% arrives, its classification is determined. It is then added to the
+%% rightmost appropriate queue.
+%%
+%% If a new message is determined to be a beta or gamma, q1 is
+%% empty. If a new message is determined to be a delta, q1 and q2 are
+%% empty (and actually q4 too).
+%%
+%% When removing messages from a queue, if q4 is empty then q3 is read
+%% directly. If q3 becomes empty then the next segment's worth of
+%% messages from delta are read into q3, reducing the size of
+%% delta. If the queue is non empty, either q4 or q3 contain
+%% entries. It is never permitted for delta to hold all the messages
+%% in the queue.
+%%
+%% The duration indicated to us by the memory_monitor is used to
+%% calculate, given our current ingress and egress rates, how many
+%% messages we should hold in RAM (i.e. as alphas). We track the
+%% ingress and egress rates for both messages and pending acks and
+%% rates for both are considered when calculating the number of
+%% messages to hold in RAM. When we need to push alphas to betas or
+%% betas to gammas, we favour writing out messages that are further
+%% from the head of the queue. This minimises writes to disk, as the
+%% messages closer to the tail of the queue stay in the queue for
+%% longer, thus do not need to be replaced as quickly by sending other
+%% messages to disk.
+%%
+%% Whilst messages are pushed to disk and forgotten from RAM as soon
+%% as requested by a new setting of the queue RAM duration, the
+%% inverse is not true: we only load messages back into RAM as
+%% demanded as the queue is read from. Thus only publishes to the
+%% queue will take up available spare capacity.
+%%
+%% When we report our duration to the memory monitor, we calculate
+%% average ingress and egress rates over the last two samples, and
+%% then calculate our duration based on the sum of the ingress and
+%% egress rates. More than two samples could be used, but it's a
+%% balance between responding quickly enough to changes in
+%% producers/consumers versus ignoring temporary blips. The problem
+%% with temporary blips is that with just a few queues, they can have
+%% substantial impact on the calculation of the average duration and
+%% hence cause unnecessary I/O. Another alternative is to increase the
+%% amqqueue_process:RAM_DURATION_UPDATE_PERIOD to beyond 5
+%% seconds. However, that then runs the risk of being too slow to
+%% inform the memory monitor of changes. Thus a 5 second interval,
+%% plus a rolling average over the last two samples seems to work
+%% well in practice.
+%%
+%% The sum of the ingress and egress rates is used because the egress
+%% rate alone is not sufficient. Adding in the ingress rate means that
+%% queues which are being flooded by messages are given more memory,
+%% resulting in them being able to process the messages faster (by
+%% doing less I/O, or at least deferring it) and thus helping keep
+%% their mailboxes empty and thus the queue as a whole is more
+%% responsive. If such a queue also has fast but previously idle
+%% consumers, the consumer can then start to be driven as fast as it
+%% can go, whereas if only egress rate was being used, the incoming
+%% messages may have to be written to disk and then read back in,
+%% resulting in the hard disk being a bottleneck in driving the
+%% consumers. Generally, we want to give Rabbit every chance of
+%% getting rid of messages as fast as possible and remaining
+%% responsive, and using only the egress rate impacts that goal.
+%%
+%% Once the queue has more alphas than the target_ram_count, the
+%% surplus must be converted to betas, if not gammas, if not rolled
+%% into delta. The conditions under which these transitions occur
+%% reflect the conflicting goals of minimising RAM cost per msg, and
+%% minimising CPU cost per msg. Once the msg has become a beta, its
+%% payload is no longer in RAM, thus a read from the msg_store must
+%% occur before the msg can be delivered, but the RAM cost of a beta
+%% is the same as a gamma, so converting a beta to gamma will not free
+%% up any further RAM. To reduce the RAM cost further, the gamma must
+%% be rolled into delta. Whilst recovering a beta or a gamma to an
+%% alpha requires only one disk read (from the msg_store), recovering
+%% a msg from within delta will require two reads (queue_index and
+%% then msg_store). But delta has a near-0 per-msg RAM cost. So the
+%% conflict is between using delta more, which will free up more
+%% memory, but require additional CPU and disk ops, versus using delta
+%% less and gammas and betas more, which will cost more memory, but
+%% require fewer disk ops and less CPU overhead.
+%%
+%% In the case of a persistent msg published to a durable queue, the
+%% msg is immediately written to the msg_store and queue_index. If
+%% then additionally converted from an alpha, it'll immediately go to
+%% a gamma (as it's already in queue_index), and cannot exist as a
+%% beta. Thus a durable queue with a mixture of persistent and
+%% transient msgs in it which has more messages than permitted by the
+%% target_ram_count may contain an interspersed mixture of betas and
+%% gammas in q2 and q3.
+%%
+%% There is then a ratio that controls how many betas and gammas there
+%% can be. This is based on the target_ram_count and thus expresses
+%% the fact that as the number of permitted alphas in the queue falls,
+%% so should the number of betas and gammas fall (i.e. delta
+%% grows). If q2 and q3 contain more than the permitted number of
+%% betas and gammas, then the surplus are forcibly converted to gammas
+%% (as necessary) and then rolled into delta. The ratio is that
+%% delta/(betas+gammas+delta) equals
+%% (betas+gammas+delta)/(target_ram_count+betas+gammas+delta). I.e. as
+%% the target_ram_count shrinks to 0, so must betas and gammas.
+%%
+%% The conversion of betas to deltas is done if there are at least
+%% ?IO_BATCH_SIZE betas in q2 & q3. This value should not be too small,
+%% otherwise the frequent operations on the queues of q2 and q3 will not be
+%% effectively amortised (switching the direction of queue access defeats
+%% amortisation). Note that there is a natural upper bound due to credit_flow
+%% limits on the alpha to beta conversion.
+%%
+%% The conversion from alphas to betas is chunked due to the
+%% credit_flow limits of the msg_store. This further smooths the
+%% effects of changes to the target_ram_count and ensures the queue
+%% remains responsive even when there is a large amount of IO work to
+%% do. The 'resume' callback is utilised to ensure that conversions
+%% are done as promptly as possible whilst ensuring the queue remains
+%% responsive.
+%%
+%% In the queue we keep track of both messages that are pending
+%% delivery and messages that are pending acks. In the event of a
+%% queue purge, we only need to load qi segments if the queue has
+%% elements in deltas (i.e. it came under significant memory
+%% pressure). In the event of a queue deletion, in addition to the
+%% preceding, by keeping track of pending acks in RAM, we do not need
+%% to search through qi segments looking for messages that are yet to
+%% be acknowledged.
+%%
+%% Pending acks are recorded in memory by storing the message itself.
+%% If the message has been sent to disk, we do not store the message
+%% content. During memory reduction, pending acks containing message
+%% content have that content removed and the corresponding messages
+%% are pushed out to disk.
+%%
+%% Messages from pending acks are returned to q4, q3 and delta during
+%% requeue, based on the limits of seq_id contained in each. Requeued
+%% messages retain their original seq_id, maintaining order
+%% when requeued.
+%%
+%% The order in which alphas are pushed to betas and pending acks
+%% are pushed to disk is determined dynamically. We always prefer to
+%% push messages for the source (alphas or acks) that is growing the
+%% fastest (with growth measured as avg. ingress - avg. egress).
+%%
+%% Notes on Clean Shutdown
+%% (This documents behaviour in variable_queue, queue_index and
+%% msg_store.)
+%%
+%% In order to try to achieve as fast a start-up as possible, if a
+%% clean shutdown occurs, we try to save out state to disk to reduce
+%% work on startup. In the msg_store this takes the form of the
+%% index_module's state, plus the file_summary ets table, and client
+%% refs. In the VQ, this takes the form of the count of persistent
+%% messages in the queue and references into the msg_stores. The
+%% queue_index adds to these terms the details of its segments and
+%% stores the terms in the queue directory.
+%%
+%% Two message stores are used. One is created for persistent messages
+%% to durable queues that must survive restarts, and the other is used
+%% for all other messages that just happen to need to be written to
+%% disk. On start up we can therefore nuke the transient message
+%% store, and be sure that the messages in the persistent store are
+%% all that we need.
+%%
+%% The references to the msg_stores are there so that the msg_store
+%% knows to only trust its saved state if all of the queues it was
+%% previously talking to come up cleanly. Likewise, the queues
+%% themselves (esp queue_index) skips work in init if all the queues
+%% and msg_store were shutdown cleanly. This gives both good speed
+%% improvements and also robustness so that if anything possibly went
+%% wrong in shutdown (or there was subsequent manual tampering), all
+%% messages and queues that can be recovered are recovered, safely.
+%%
+%% To delete transient messages lazily, the variable_queue, on
+%% startup, stores the next_seq_id reported by the queue_index as the
+%% transient_threshold. From that point on, whenever it's reading a
+%% message off disk via the queue_index, if the seq_id is below this
+%% threshold and the message is transient then it drops the message
+%% (the message itself won't exist on disk because it would have been
+%% stored in the transient msg_store which would have had its saved
+%% state nuked on startup). This avoids the expensive operation of
+%% scanning the entire queue on startup in order to delete transient
+%% messages that were only pushed to disk to save memory.
+%%
+%%----------------------------------------------------------------------------
+
+-behaviour(rabbit_backing_queue).
+
+-record(vqstate,
+ { q1,
+ q2,
+ delta,
+ q3,
+ q4,
+ next_seq_id,
+ ram_pending_ack, %% msgs using store, still in RAM
+ disk_pending_ack, %% msgs in store, paged out
+ qi_pending_ack, %% msgs using qi, *can't* be paged out
+ index_state,
+ msg_store_clients,
+ durable,
+ transient_threshold,
+ qi_embed_msgs_below,
+
+ len, %% w/o unacked
+ bytes, %% w/o unacked
+ unacked_bytes,
+ persistent_count, %% w unacked
+ persistent_bytes, %% w unacked
+ delta_transient_bytes, %%
+
+ target_ram_count,
+ ram_msg_count, %% w/o unacked
+ ram_msg_count_prev,
+ ram_ack_count_prev,
+ ram_bytes, %% w unacked
+ out_counter,
+ in_counter,
+ rates,
+ msgs_on_disk,
+ msg_indices_on_disk,
+ unconfirmed,
+ confirmed,
+ ack_out_counter,
+ ack_in_counter,
+ %% Unlike the other counters these two do not feed into
+ %% #rates{} and get reset
+ disk_read_count,
+ disk_write_count,
+
+ io_batch_size,
+
+ %% default queue or lazy queue
+ mode,
+ %% number of reduce_memory_usage executions, once it
+ %% reaches a threshold the queue will manually trigger a runtime GC
+ %% see: maybe_execute_gc/1
+ memory_reduction_run_count,
+ %% Queue data is grouped by VHost. We need to store it
+ %% to work with queue index.
+ virtual_host,
+ waiting_bump = false
+ }).
+
+-record(rates, { in, out, ack_in, ack_out, timestamp }).
+
+-record(msg_status,
+ { seq_id,
+ msg_id,
+ msg,
+ is_persistent,
+ is_delivered,
+ msg_in_store,
+ index_on_disk,
+ persist_to,
+ msg_props
+ }).
+
+-record(delta,
+ { start_seq_id, %% start_seq_id is inclusive
+ count,
+ transient,
+ end_seq_id %% end_seq_id is exclusive
+ }).
+
+-define(HEADER_GUESS_SIZE, 100). %% see determine_persist_to/2
+-define(PERSISTENT_MSG_STORE, msg_store_persistent).
+-define(TRANSIENT_MSG_STORE, msg_store_transient).
+
+-define(QUEUE, lqueue).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include_lib("rabbit_common/include/rabbit_framing.hrl").
+-include("amqqueue.hrl").
+
+%%----------------------------------------------------------------------------
+
+-rabbit_upgrade({multiple_routing_keys, local, []}).
+-rabbit_upgrade({move_messages_to_vhost_store, message_store, []}).
+
+-type seq_id() :: non_neg_integer().
+
+-type rates() :: #rates { in :: float(),
+ out :: float(),
+ ack_in :: float(),
+ ack_out :: float(),
+ timestamp :: rabbit_types:timestamp()}.
+
+-type delta() :: #delta { start_seq_id :: non_neg_integer(),
+ count :: non_neg_integer(),
+ end_seq_id :: non_neg_integer() }.
+
+%% The compiler (rightfully) complains that ack() and state() are
+%% unused. For this reason we duplicate a -spec from
+%% rabbit_backing_queue with the only intent being to remove
+%% warnings. The problem here is that we can't parameterise the BQ
+%% behaviour by these two types as we would like to. We still leave
+%% these here for documentation purposes.
+-type ack() :: seq_id().
+-type state() :: #vqstate {
+ q1 :: ?QUEUE:?QUEUE(),
+ q2 :: ?QUEUE:?QUEUE(),
+ delta :: delta(),
+ q3 :: ?QUEUE:?QUEUE(),
+ q4 :: ?QUEUE:?QUEUE(),
+ next_seq_id :: seq_id(),
+ ram_pending_ack :: gb_trees:tree(),
+ disk_pending_ack :: gb_trees:tree(),
+ qi_pending_ack :: gb_trees:tree(),
+ index_state :: any(),
+ msg_store_clients :: 'undefined' | {{any(), binary()},
+ {any(), binary()}},
+ durable :: boolean(),
+ transient_threshold :: non_neg_integer(),
+ qi_embed_msgs_below :: non_neg_integer(),
+
+ len :: non_neg_integer(),
+ bytes :: non_neg_integer(),
+ unacked_bytes :: non_neg_integer(),
+
+ persistent_count :: non_neg_integer(),
+ persistent_bytes :: non_neg_integer(),
+
+ target_ram_count :: non_neg_integer() | 'infinity',
+ ram_msg_count :: non_neg_integer(),
+ ram_msg_count_prev :: non_neg_integer(),
+ ram_ack_count_prev :: non_neg_integer(),
+ ram_bytes :: non_neg_integer(),
+ out_counter :: non_neg_integer(),
+ in_counter :: non_neg_integer(),
+ rates :: rates(),
+ msgs_on_disk :: gb_sets:set(),
+ msg_indices_on_disk :: gb_sets:set(),
+ unconfirmed :: gb_sets:set(),
+ confirmed :: gb_sets:set(),
+ ack_out_counter :: non_neg_integer(),
+ ack_in_counter :: non_neg_integer(),
+ disk_read_count :: non_neg_integer(),
+ disk_write_count :: non_neg_integer(),
+
+ io_batch_size :: pos_integer(),
+ mode :: 'default' | 'lazy',
+ memory_reduction_run_count :: non_neg_integer()}.
+
+-define(BLANK_DELTA, #delta { start_seq_id = undefined,
+ count = 0,
+ transient = 0,
+ end_seq_id = undefined }).
+-define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z,
+ count = 0,
+ transient = 0,
+ end_seq_id = Z }).
+
+-define(MICROS_PER_SECOND, 1000000.0).
+
+%% We're sampling every 5s for RAM duration; a half life that is of
+%% the same order of magnitude is probably about right.
+-define(RATE_AVG_HALF_LIFE, 5.0).
+
+%% We will recalculate the #rates{} every time we get asked for our
+%% RAM duration, or every N messages published, whichever is
+%% sooner. We do this since the priority calculations in
+%% rabbit_amqqueue_process need fairly fresh rates.
+-define(MSGS_PER_RATE_CALC, 100).
+
+%% we define the garbage collector threshold
+%% it needs to tune the `reduce_memory_use` calls. Thus, the garbage collection.
+%% see: rabbitmq-server-973 and rabbitmq-server-964
+-define(DEFAULT_EXPLICIT_GC_RUN_OP_THRESHOLD, 1000).
+-define(EXPLICIT_GC_RUN_OP_THRESHOLD(Mode),
+ case get(explicit_gc_run_operation_threshold) of
+ undefined ->
+ Val = explicit_gc_run_operation_threshold_for_mode(Mode),
+ put(explicit_gc_run_operation_threshold, Val),
+ Val;
+ Val -> Val
+ end).
+
+explicit_gc_run_operation_threshold_for_mode(Mode) ->
+ {Key, Fallback} = case Mode of
+ lazy -> {lazy_queue_explicit_gc_run_operation_threshold,
+ ?DEFAULT_EXPLICIT_GC_RUN_OP_THRESHOLD};
+ _ -> {queue_explicit_gc_run_operation_threshold,
+ ?DEFAULT_EXPLICIT_GC_RUN_OP_THRESHOLD}
+ end,
+ rabbit_misc:get_env(rabbit, Key, Fallback).
+
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
+start(VHost, DurableQueues) ->
+ {AllTerms, StartFunState} = rabbit_queue_index:start(VHost, DurableQueues),
+ %% Group recovery terms by vhost.
+ ClientRefs = [Ref || Terms <- AllTerms,
+ Terms /= non_clean_shutdown,
+ begin
+ Ref = proplists:get_value(persistent_ref, Terms),
+ Ref =/= undefined
+ end],
+ start_msg_store(VHost, ClientRefs, StartFunState),
+ {ok, AllTerms}.
+
+stop(VHost) ->
+ ok = stop_msg_store(VHost),
+ ok = rabbit_queue_index:stop(VHost).
+
+start_msg_store(VHost, Refs, StartFunState) when is_list(Refs); Refs == undefined ->
+ rabbit_log:info("Starting message stores for vhost '~s'~n", [VHost]),
+ do_start_msg_store(VHost, ?TRANSIENT_MSG_STORE, undefined, ?EMPTY_START_FUN_STATE),
+ do_start_msg_store(VHost, ?PERSISTENT_MSG_STORE, Refs, StartFunState),
+ ok.
+
+do_start_msg_store(VHost, Type, Refs, StartFunState) ->
+ case rabbit_vhost_msg_store:start(VHost, Type, Refs, StartFunState) of
+ {ok, _} ->
+ rabbit_log:info("Started message store of type ~s for vhost '~s'~n", [abbreviated_type(Type), VHost]);
+ {error, {no_such_vhost, VHost}} = Err ->
+ rabbit_log:error("Failed to start message store of type ~s for vhost '~s': the vhost no longer exists!~n",
+ [Type, VHost]),
+ exit(Err);
+ {error, Error} ->
+ rabbit_log:error("Failed to start message store of type ~s for vhost '~s': ~p~n",
+ [Type, VHost, Error]),
+ exit({error, Error})
+ end.
+
+abbreviated_type(?TRANSIENT_MSG_STORE) -> transient;
+abbreviated_type(?PERSISTENT_MSG_STORE) -> persistent.
+
+stop_msg_store(VHost) ->
+ rabbit_vhost_msg_store:stop(VHost, ?TRANSIENT_MSG_STORE),
+ rabbit_vhost_msg_store:stop(VHost, ?PERSISTENT_MSG_STORE),
+ ok.
+
+init(Queue, Recover, Callback) ->
+ init(
+ Queue, Recover, Callback,
+ fun (MsgIds, ActionTaken) ->
+ msgs_written_to_disk(Callback, MsgIds, ActionTaken)
+ end,
+ fun (MsgIds) -> msg_indices_written_to_disk(Callback, MsgIds) end,
+ fun (MsgIds) -> msgs_and_indices_written_to_disk(Callback, MsgIds) end).
+
+init(Q, new, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueue(Q) ->
+ QueueName = amqqueue:get_name(Q),
+ IsDurable = amqqueue:is_durable(Q),
+ IndexState = rabbit_queue_index:init(QueueName,
+ MsgIdxOnDiskFun, MsgAndIdxOnDiskFun),
+ VHost = QueueName#resource.virtual_host,
+ init(IsDurable, IndexState, 0, 0, [],
+ case IsDurable of
+ true -> msg_store_client_init(?PERSISTENT_MSG_STORE,
+ MsgOnDiskFun, AsyncCallback, VHost);
+ false -> undefined
+ end,
+ msg_store_client_init(?TRANSIENT_MSG_STORE, undefined,
+ AsyncCallback, VHost), VHost);
+
+%% We can be recovering a transient queue if it crashed
+init(Q, Terms, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueue(Q) ->
+ QueueName = amqqueue:get_name(Q),
+ IsDurable = amqqueue:is_durable(Q),
+ {PRef, RecoveryTerms} = process_recovery_terms(Terms),
+ VHost = QueueName#resource.virtual_host,
+ {PersistentClient, ContainsCheckFun} =
+ case IsDurable of
+ true -> C = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef,
+ MsgOnDiskFun, AsyncCallback,
+ VHost),
+ {C, fun (MsgId) when is_binary(MsgId) ->
+ rabbit_msg_store:contains(MsgId, C);
+ (#basic_message{is_persistent = Persistent}) ->
+ Persistent
+ end};
+ false -> {undefined, fun(_MsgId) -> false end}
+ end,
+ TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE,
+ undefined, AsyncCallback,
+ VHost),
+ {DeltaCount, DeltaBytes, IndexState} =
+ rabbit_queue_index:recover(
+ QueueName, RecoveryTerms,
+ rabbit_vhost_msg_store:successfully_recovered_state(
+ VHost,
+ ?PERSISTENT_MSG_STORE),
+ ContainsCheckFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun),
+ init(IsDurable, IndexState, DeltaCount, DeltaBytes, RecoveryTerms,
+ PersistentClient, TransientClient, VHost).
+
+process_recovery_terms(Terms=non_clean_shutdown) ->
+ {rabbit_guid:gen(), Terms};
+process_recovery_terms(Terms) ->
+ case proplists:get_value(persistent_ref, Terms) of
+ undefined -> {rabbit_guid:gen(), []};
+ PRef -> {PRef, Terms}
+ end.
+
+terminate(_Reason, State) ->
+ State1 = #vqstate { virtual_host = VHost,
+ persistent_count = PCount,
+ persistent_bytes = PBytes,
+ index_state = IndexState,
+ msg_store_clients = {MSCStateP, MSCStateT} } =
+ purge_pending_ack(true, State),
+ PRef = case MSCStateP of
+ undefined -> undefined;
+ _ -> ok = maybe_client_terminate(MSCStateP),
+ rabbit_msg_store:client_ref(MSCStateP)
+ end,
+ ok = rabbit_msg_store:client_delete_and_terminate(MSCStateT),
+ Terms = [{persistent_ref, PRef},
+ {persistent_count, PCount},
+ {persistent_bytes, PBytes}],
+ a(State1#vqstate {
+ index_state = rabbit_queue_index:terminate(VHost, Terms, IndexState),
+ msg_store_clients = undefined }).
+
+%% the only difference between purge and delete is that delete also
+%% needs to delete everything that's been delivered and not ack'd.
+delete_and_terminate(_Reason, State) ->
+ %% Normally when we purge messages we interact with the qi by
+ %% issues delivers and acks for every purged message. In this case
+ %% we don't need to do that, so we just delete the qi.
+ State1 = purge_and_index_reset(State),
+ State2 = #vqstate { msg_store_clients = {MSCStateP, MSCStateT} } =
+ purge_pending_ack_delete_and_terminate(State1),
+ case MSCStateP of
+ undefined -> ok;
+ _ -> rabbit_msg_store:client_delete_and_terminate(MSCStateP)
+ end,
+ rabbit_msg_store:client_delete_and_terminate(MSCStateT),
+ a(State2 #vqstate { msg_store_clients = undefined }).
+
+delete_crashed(Q) when ?is_amqqueue(Q) ->
+ QName = amqqueue:get_name(Q),
+ ok = rabbit_queue_index:erase(QName).
+
+purge(State = #vqstate { len = Len }) ->
+ case is_pending_ack_empty(State) and is_unconfirmed_empty(State) of
+ true ->
+ {Len, purge_and_index_reset(State)};
+ false ->
+ {Len, purge_when_pending_acks(State)}
+ end.
+
+purge_acks(State) -> a(purge_pending_ack(false, State)).
+
+publish(Msg, MsgProps, IsDelivered, ChPid, Flow, State) ->
+ State1 =
+ publish1(Msg, MsgProps, IsDelivered, ChPid, Flow,
+ fun maybe_write_to_disk/4,
+ State),
+ a(maybe_reduce_memory_use(maybe_update_rates(State1))).
+
+batch_publish(Publishes, ChPid, Flow, State) ->
+ {ChPid, Flow, State1} =
+ lists:foldl(fun batch_publish1/2, {ChPid, Flow, State}, Publishes),
+ State2 = ui(State1),
+ a(maybe_reduce_memory_use(maybe_update_rates(State2))).
+
+publish_delivered(Msg, MsgProps, ChPid, Flow, State) ->
+ {SeqId, State1} =
+ publish_delivered1(Msg, MsgProps, ChPid, Flow,
+ fun maybe_write_to_disk/4,
+ State),
+ {SeqId, a(maybe_reduce_memory_use(maybe_update_rates(State1)))}.
+
+batch_publish_delivered(Publishes, ChPid, Flow, State) ->
+ {ChPid, Flow, SeqIds, State1} =
+ lists:foldl(fun batch_publish_delivered1/2,
+ {ChPid, Flow, [], State}, Publishes),
+ State2 = ui(State1),
+ {lists:reverse(SeqIds), a(maybe_reduce_memory_use(maybe_update_rates(State2)))}.
+
+discard(_MsgId, _ChPid, _Flow, State) -> State.
+
+drain_confirmed(State = #vqstate { confirmed = C }) ->
+ case gb_sets:is_empty(C) of
+ true -> {[], State}; %% common case
+ false -> {gb_sets:to_list(C), State #vqstate {
+ confirmed = gb_sets:new() }}
+ end.
+
+dropwhile(Pred, State) ->
+ {MsgProps, State1} =
+ remove_by_predicate(Pred, State),
+ {MsgProps, a(State1)}.
+
+fetchwhile(Pred, Fun, Acc, State) ->
+ {MsgProps, Acc1, State1} =
+ fetch_by_predicate(Pred, Fun, Acc, State),
+ {MsgProps, Acc1, a(State1)}.
+
+fetch(AckRequired, State) ->
+ case queue_out(State) of
+ {empty, State1} ->
+ {empty, a(State1)};
+ {{value, MsgStatus}, State1} ->
+ %% it is possible that the message wasn't read from disk
+ %% at this point, so read it in.
+ {Msg, State2} = read_msg(MsgStatus, State1),
+ {AckTag, State3} = remove(AckRequired, MsgStatus, State2),
+ {{Msg, MsgStatus#msg_status.is_delivered, AckTag}, a(State3)}
+ end.
+
+drop(AckRequired, State) ->
+ case queue_out(State) of
+ {empty, State1} ->
+ {empty, a(State1)};
+ {{value, MsgStatus}, State1} ->
+ {AckTag, State2} = remove(AckRequired, MsgStatus, State1),
+ {{MsgStatus#msg_status.msg_id, AckTag}, a(State2)}
+ end.
+
+%% Duplicated from rabbit_backing_queue
+-spec ack([ack()], state()) -> {[rabbit_guid:guid()], state()}.
+
+ack([], State) ->
+ {[], State};
+%% optimisation: this head is essentially a partial evaluation of the
+%% general case below, for the single-ack case.
+ack([SeqId], State) ->
+ case remove_pending_ack(true, SeqId, State) of
+ {none, _} ->
+ {[], State};
+ {#msg_status { msg_id = MsgId,
+ is_persistent = IsPersistent,
+ msg_in_store = MsgInStore,
+ index_on_disk = IndexOnDisk },
+ State1 = #vqstate { index_state = IndexState,
+ msg_store_clients = MSCState,
+ ack_out_counter = AckOutCount }} ->
+ IndexState1 = case IndexOnDisk of
+ true -> rabbit_queue_index:ack([SeqId], IndexState);
+ false -> IndexState
+ end,
+ case MsgInStore of
+ true -> ok = msg_store_remove(MSCState, IsPersistent, [MsgId]);
+ false -> ok
+ end,
+ {[MsgId],
+ a(State1 #vqstate { index_state = IndexState1,
+ ack_out_counter = AckOutCount + 1 })}
+ end;
+ack(AckTags, State) ->
+ {{IndexOnDiskSeqIds, MsgIdsByStore, AllMsgIds},
+ State1 = #vqstate { index_state = IndexState,
+ msg_store_clients = MSCState,
+ ack_out_counter = AckOutCount }} =
+ lists:foldl(
+ fun (SeqId, {Acc, State2}) ->
+ case remove_pending_ack(true, SeqId, State2) of
+ {none, _} ->
+ {Acc, State2};
+ {MsgStatus, State3} ->
+ {accumulate_ack(MsgStatus, Acc), State3}
+ end
+ end, {accumulate_ack_init(), State}, AckTags),
+ IndexState1 = rabbit_queue_index:ack(IndexOnDiskSeqIds, IndexState),
+ remove_msgs_by_id(MsgIdsByStore, MSCState),
+ {lists:reverse(AllMsgIds),
+ a(State1 #vqstate { index_state = IndexState1,
+ ack_out_counter = AckOutCount + length(AckTags) })}.
+
+requeue(AckTags, #vqstate { mode = default,
+ delta = Delta,
+ q3 = Q3,
+ q4 = Q4,
+ in_counter = InCounter,
+ len = Len } = State) ->
+ {SeqIds, Q4a, MsgIds, State1} = queue_merge(lists:sort(AckTags), Q4, [],
+ beta_limit(Q3),
+ fun publish_alpha/2, State),
+ {SeqIds1, Q3a, MsgIds1, State2} = queue_merge(SeqIds, Q3, MsgIds,
+ delta_limit(Delta),
+ fun publish_beta/2, State1),
+ {Delta1, MsgIds2, State3} = delta_merge(SeqIds1, Delta, MsgIds1,
+ State2),
+ MsgCount = length(MsgIds2),
+ {MsgIds2, a(maybe_reduce_memory_use(
+ maybe_update_rates(ui(
+ State3 #vqstate { delta = Delta1,
+ q3 = Q3a,
+ q4 = Q4a,
+ in_counter = InCounter + MsgCount,
+ len = Len + MsgCount }))))};
+requeue(AckTags, #vqstate { mode = lazy,
+ delta = Delta,
+ q3 = Q3,
+ in_counter = InCounter,
+ len = Len } = State) ->
+ {SeqIds, Q3a, MsgIds, State1} = queue_merge(lists:sort(AckTags), Q3, [],
+ delta_limit(Delta),
+ fun publish_beta/2, State),
+ {Delta1, MsgIds1, State2} = delta_merge(SeqIds, Delta, MsgIds,
+ State1),
+ MsgCount = length(MsgIds1),
+ {MsgIds1, a(maybe_reduce_memory_use(
+ maybe_update_rates(ui(
+ State2 #vqstate { delta = Delta1,
+ q3 = Q3a,
+ in_counter = InCounter + MsgCount,
+ len = Len + MsgCount }))))}.
+
+ackfold(MsgFun, Acc, State, AckTags) ->
+ {AccN, StateN} =
+ lists:foldl(fun(SeqId, {Acc0, State0}) ->
+ MsgStatus = lookup_pending_ack(SeqId, State0),
+ {Msg, State1} = read_msg(MsgStatus, State0),
+ {MsgFun(Msg, SeqId, Acc0), State1}
+ end, {Acc, State}, AckTags),
+ {AccN, a(StateN)}.
+
+fold(Fun, Acc, State = #vqstate{index_state = IndexState}) ->
+ {Its, IndexState1} = lists:foldl(fun inext/2, {[], IndexState},
+ [msg_iterator(State),
+ disk_ack_iterator(State),
+ ram_ack_iterator(State),
+ qi_ack_iterator(State)]),
+ ifold(Fun, Acc, Its, State#vqstate{index_state = IndexState1}).
+
+len(#vqstate { len = Len }) -> Len.
+
+is_empty(State) -> 0 == len(State).
+
+depth(State) ->
+ len(State) + count_pending_acks(State).
+
+set_ram_duration_target(
+ DurationTarget, State = #vqstate {
+ rates = #rates { in = AvgIngressRate,
+ out = AvgEgressRate,
+ ack_in = AvgAckIngressRate,
+ ack_out = AvgAckEgressRate },
+ target_ram_count = TargetRamCount }) ->
+ Rate =
+ AvgEgressRate + AvgIngressRate + AvgAckEgressRate + AvgAckIngressRate,
+ TargetRamCount1 =
+ case DurationTarget of
+ infinity -> infinity;
+ _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
+ end,
+ State1 = State #vqstate { target_ram_count = TargetRamCount1 },
+ a(case TargetRamCount1 == infinity orelse
+ (TargetRamCount =/= infinity andalso
+ TargetRamCount1 >= TargetRamCount) of
+ true -> State1;
+ false -> reduce_memory_use(State1)
+ end).
+
+maybe_update_rates(State = #vqstate{ in_counter = InCount,
+ out_counter = OutCount })
+ when InCount + OutCount > ?MSGS_PER_RATE_CALC ->
+ update_rates(State);
+maybe_update_rates(State) ->
+ State.
+
+update_rates(State = #vqstate{ in_counter = InCount,
+ out_counter = OutCount,
+ ack_in_counter = AckInCount,
+ ack_out_counter = AckOutCount,
+ rates = #rates{ in = InRate,
+ out = OutRate,
+ ack_in = AckInRate,
+ ack_out = AckOutRate,
+ timestamp = TS }}) ->
+ Now = erlang:monotonic_time(),
+
+ Rates = #rates { in = update_rate(Now, TS, InCount, InRate),
+ out = update_rate(Now, TS, OutCount, OutRate),
+ ack_in = update_rate(Now, TS, AckInCount, AckInRate),
+ ack_out = update_rate(Now, TS, AckOutCount, AckOutRate),
+ timestamp = Now },
+
+ State#vqstate{ in_counter = 0,
+ out_counter = 0,
+ ack_in_counter = 0,
+ ack_out_counter = 0,
+ rates = Rates }.
+
+update_rate(Now, TS, Count, Rate) ->
+ Time = erlang:convert_time_unit(Now - TS, native, micro_seconds) /
+ ?MICROS_PER_SECOND,
+ if
+ Time == 0 -> Rate;
+ true -> rabbit_misc:moving_average(Time, ?RATE_AVG_HALF_LIFE,
+ Count / Time, Rate)
+ end.
+
+ram_duration(State) ->
+ State1 = #vqstate { rates = #rates { in = AvgIngressRate,
+ out = AvgEgressRate,
+ ack_in = AvgAckIngressRate,
+ ack_out = AvgAckEgressRate },
+ ram_msg_count = RamMsgCount,
+ ram_msg_count_prev = RamMsgCountPrev,
+ ram_pending_ack = RPA,
+ qi_pending_ack = QPA,
+ ram_ack_count_prev = RamAckCountPrev } =
+ update_rates(State),
+
+ RamAckCount = gb_trees:size(RPA) + gb_trees:size(QPA),
+
+ Duration = %% msgs+acks / (msgs+acks/sec) == sec
+ case lists:all(fun (X) -> X < 0.01 end,
+ [AvgEgressRate, AvgIngressRate,
+ AvgAckEgressRate, AvgAckIngressRate]) of
+ true -> infinity;
+ false -> (RamMsgCountPrev + RamMsgCount +
+ RamAckCount + RamAckCountPrev) /
+ (4 * (AvgEgressRate + AvgIngressRate +
+ AvgAckEgressRate + AvgAckIngressRate))
+ end,
+
+ {Duration, State1}.
+
+needs_timeout(#vqstate { index_state = IndexState }) ->
+ case rabbit_queue_index:needs_sync(IndexState) of
+ confirms -> timed;
+ other -> idle;
+ false -> false
+ end.
+
+timeout(State = #vqstate { index_state = IndexState }) ->
+ State #vqstate { index_state = rabbit_queue_index:sync(IndexState) }.
+
+handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
+ State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }.
+
+handle_info(bump_reduce_memory_use, State = #vqstate{ waiting_bump = true }) ->
+ State#vqstate{ waiting_bump = false };
+handle_info(bump_reduce_memory_use, State) ->
+ State.
+
+resume(State) -> a(reduce_memory_use(State)).
+
+msg_rates(#vqstate { rates = #rates { in = AvgIngressRate,
+ out = AvgEgressRate } }) ->
+ {AvgIngressRate, AvgEgressRate}.
+
+info(messages_ready_ram, #vqstate{ram_msg_count = RamMsgCount}) ->
+ RamMsgCount;
+info(messages_unacknowledged_ram, #vqstate{ram_pending_ack = RPA,
+ qi_pending_ack = QPA}) ->
+ gb_trees:size(RPA) + gb_trees:size(QPA);
+info(messages_ram, State) ->
+ info(messages_ready_ram, State) + info(messages_unacknowledged_ram, State);
+info(messages_persistent, #vqstate{persistent_count = PersistentCount}) ->
+ PersistentCount;
+info(messages_paged_out, #vqstate{delta = #delta{transient = Count}}) ->
+ Count;
+info(message_bytes, #vqstate{bytes = Bytes,
+ unacked_bytes = UBytes}) ->
+ Bytes + UBytes;
+info(message_bytes_ready, #vqstate{bytes = Bytes}) ->
+ Bytes;
+info(message_bytes_unacknowledged, #vqstate{unacked_bytes = UBytes}) ->
+ UBytes;
+info(message_bytes_ram, #vqstate{ram_bytes = RamBytes}) ->
+ RamBytes;
+info(message_bytes_persistent, #vqstate{persistent_bytes = PersistentBytes}) ->
+ PersistentBytes;
+info(message_bytes_paged_out, #vqstate{delta_transient_bytes = PagedOutBytes}) ->
+ PagedOutBytes;
+info(head_message_timestamp, #vqstate{
+ q3 = Q3,
+ q4 = Q4,
+ ram_pending_ack = RPA,
+ qi_pending_ack = QPA}) ->
+ head_message_timestamp(Q3, Q4, RPA, QPA);
+info(disk_reads, #vqstate{disk_read_count = Count}) ->
+ Count;
+info(disk_writes, #vqstate{disk_write_count = Count}) ->
+ Count;
+info(backing_queue_status, #vqstate {
+ q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
+ mode = Mode,
+ len = Len,
+ target_ram_count = TargetRamCount,
+ next_seq_id = NextSeqId,
+ rates = #rates { in = AvgIngressRate,
+ out = AvgEgressRate,
+ ack_in = AvgAckIngressRate,
+ ack_out = AvgAckEgressRate }}) ->
+
+ [ {mode , Mode},
+ {q1 , ?QUEUE:len(Q1)},
+ {q2 , ?QUEUE:len(Q2)},
+ {delta , Delta},
+ {q3 , ?QUEUE:len(Q3)},
+ {q4 , ?QUEUE:len(Q4)},
+ {len , Len},
+ {target_ram_count , TargetRamCount},
+ {next_seq_id , NextSeqId},
+ {avg_ingress_rate , AvgIngressRate},
+ {avg_egress_rate , AvgEgressRate},
+ {avg_ack_ingress_rate, AvgAckIngressRate},
+ {avg_ack_egress_rate , AvgAckEgressRate} ];
+info(_, _) ->
+ ''.
+
+invoke(?MODULE, Fun, State) -> Fun(?MODULE, State);
+invoke( _, _, State) -> State.
+
+is_duplicate(_Msg, State) -> {false, State}.
+
+set_queue_mode(Mode, State = #vqstate { mode = Mode }) ->
+ State;
+set_queue_mode(lazy, State = #vqstate {
+ target_ram_count = TargetRamCount }) ->
+ %% To become a lazy queue we need to page everything to disk first.
+ State1 = convert_to_lazy(State),
+ %% restore the original target_ram_count
+ a(State1 #vqstate { mode = lazy, target_ram_count = TargetRamCount });
+set_queue_mode(default, State) ->
+ %% becoming a default queue means loading messages from disk like
+ %% when a queue is recovered.
+ a(maybe_deltas_to_betas(State #vqstate { mode = default }));
+set_queue_mode(_, State) ->
+ State.
+
+zip_msgs_and_acks(Msgs, AckTags, Accumulator, _State) ->
+ lists:foldl(fun ({{#basic_message{ id = Id }, _Props}, AckTag}, Acc) ->
+ [{Id, AckTag} | Acc]
+ end, Accumulator, lists:zip(Msgs, AckTags)).
+
+convert_to_lazy(State) ->
+ State1 = #vqstate { delta = Delta, q3 = Q3, len = Len } =
+ set_ram_duration_target(0, State),
+ case Delta#delta.count + ?QUEUE:len(Q3) == Len of
+ true ->
+ State1;
+ false ->
+ %% When pushing messages to disk, we might have been
+ %% blocked by the msg_store, so we need to see if we have
+ %% to wait for more credit, and then keep paging messages.
+ %%
+ %% The amqqueue_process could have taken care of this, but
+ %% between the time it receives the bump_credit msg and
+ %% calls BQ:resume to keep paging messages to disk, some
+ %% other request may arrive to the BQ which at this moment
+ %% is not in a proper state for a lazy BQ (unless all
+ %% messages have been paged to disk already).
+ wait_for_msg_store_credit(),
+ convert_to_lazy(resume(State1))
+ end.
+
+wait_for_msg_store_credit() ->
+ case credit_flow:blocked() of
+ true -> receive
+ {bump_credit, Msg} ->
+ credit_flow:handle_bump_msg(Msg)
+ end;
+ false -> ok
+ end.
+
+%% Get the Timestamp property of the first msg, if present. This is
+%% the one with the oldest timestamp among the heads of the pending
+%% acks and unread queues. We can't check disk_pending_acks as these
+%% are paged out - we assume some will soon be paged in rather than
+%% forcing it to happen. Pending ack msgs are included as they are
+%% regarded as unprocessed until acked, this also prevents the result
+%% apparently oscillating during repeated rejects. Q3 is only checked
+%% when Q4 is empty as any Q4 msg will be earlier.
+head_message_timestamp(Q3, Q4, RPA, QPA) ->
+ HeadMsgs = [ HeadMsgStatus#msg_status.msg ||
+ HeadMsgStatus <-
+ [ get_qs_head([Q4, Q3]),
+ get_pa_head(RPA),
+ get_pa_head(QPA) ],
+ HeadMsgStatus /= undefined,
+ HeadMsgStatus#msg_status.msg /= undefined ],
+
+ Timestamps =
+ [Timestamp || HeadMsg <- HeadMsgs,
+ Timestamp <- [rabbit_basic:extract_timestamp(
+ HeadMsg#basic_message.content)],
+ Timestamp /= undefined
+ ],
+
+ case Timestamps == [] of
+ true -> '';
+ false -> lists:min(Timestamps)
+ end.
+
+get_qs_head(Qs) ->
+ catch lists:foldl(
+ fun (Q, Acc) ->
+ case get_q_head(Q) of
+ undefined -> Acc;
+ Val -> throw(Val)
+ end
+ end, undefined, Qs).
+
+get_q_head(Q) ->
+ get_collection_head(Q, fun ?QUEUE:is_empty/1, fun ?QUEUE:peek/1).
+
+get_pa_head(PA) ->
+ get_collection_head(PA, fun gb_trees:is_empty/1, fun gb_trees:smallest/1).
+
+get_collection_head(Col, IsEmpty, GetVal) ->
+ case IsEmpty(Col) of
+ false ->
+ {_, MsgStatus} = GetVal(Col),
+ MsgStatus;
+ true -> undefined
+ end.
+
+%%----------------------------------------------------------------------------
+%% Minor helpers
+%%----------------------------------------------------------------------------
+a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
+ mode = default,
+ len = Len,
+ bytes = Bytes,
+ unacked_bytes = UnackedBytes,
+ persistent_count = PersistentCount,
+ persistent_bytes = PersistentBytes,
+ ram_msg_count = RamMsgCount,
+ ram_bytes = RamBytes}) ->
+ E1 = ?QUEUE:is_empty(Q1),
+ E2 = ?QUEUE:is_empty(Q2),
+ ED = Delta#delta.count == 0,
+ E3 = ?QUEUE:is_empty(Q3),
+ E4 = ?QUEUE:is_empty(Q4),
+ LZ = Len == 0,
+
+ %% if q1 has messages then q3 cannot be empty. See publish/6.
+ true = E1 or not E3,
+ %% if q2 has messages then we have messages in delta (paged to
+ %% disk). See push_alphas_to_betas/2.
+ true = E2 or not ED,
+ %% if delta has messages then q3 cannot be empty. This is enforced
+ %% by paging, where min([?SEGMENT_ENTRY_COUNT, len(q3)]) messages
+ %% are always kept on RAM.
+ true = ED or not E3,
+ %% if the queue length is 0, then q3 and q4 must be empty.
+ true = LZ == (E3 and E4),
+
+ true = Len >= 0,
+ true = Bytes >= 0,
+ true = UnackedBytes >= 0,
+ true = PersistentCount >= 0,
+ true = PersistentBytes >= 0,
+ true = RamMsgCount >= 0,
+ true = RamMsgCount =< Len,
+ true = RamBytes >= 0,
+ true = RamBytes =< Bytes + UnackedBytes,
+
+ State;
+a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
+ mode = lazy,
+ len = Len,
+ bytes = Bytes,
+ unacked_bytes = UnackedBytes,
+ persistent_count = PersistentCount,
+ persistent_bytes = PersistentBytes,
+ ram_msg_count = RamMsgCount,
+ ram_bytes = RamBytes}) ->
+ E1 = ?QUEUE:is_empty(Q1),
+ E2 = ?QUEUE:is_empty(Q2),
+ ED = Delta#delta.count == 0,
+ E3 = ?QUEUE:is_empty(Q3),
+ E4 = ?QUEUE:is_empty(Q4),
+ LZ = Len == 0,
+ L3 = ?QUEUE:len(Q3),
+
+ %% q1 must always be empty, since q1 only gets messages during
+ %% publish, but for lazy queues messages go straight to delta.
+ true = E1,
+
+ %% q2 only gets messages from q1 when push_alphas_to_betas is
+ %% called for a non empty delta, which won't be the case for a
+ %% lazy queue. This means q2 must always be empty.
+ true = E2,
+
+ %% q4 must always be empty, since q1 only gets messages during
+ %% publish, but for lazy queues messages go straight to delta.
+ true = E4,
+
+ %% if the queue is empty, then delta is empty and q3 is empty.
+ true = LZ == (ED and E3),
+
+ %% There should be no messages in q1, q2, and q4
+ true = Delta#delta.count + L3 == Len,
+
+ true = Len >= 0,
+ true = Bytes >= 0,
+ true = UnackedBytes >= 0,
+ true = PersistentCount >= 0,
+ true = PersistentBytes >= 0,
+ true = RamMsgCount >= 0,
+ true = RamMsgCount =< Len,
+ true = RamBytes >= 0,
+ true = RamBytes =< Bytes + UnackedBytes,
+
+ State.
+
+d(Delta = #delta { start_seq_id = Start, count = Count, end_seq_id = End })
+ when Start + Count =< End ->
+ Delta.
+
+m(MsgStatus = #msg_status { is_persistent = IsPersistent,
+ msg_in_store = MsgInStore,
+ index_on_disk = IndexOnDisk }) ->
+ true = (not IsPersistent) or IndexOnDisk,
+ true = msg_in_ram(MsgStatus) or MsgInStore,
+ MsgStatus.
+
+one_if(true ) -> 1;
+one_if(false) -> 0.
+
+cons_if(true, E, L) -> [E | L];
+cons_if(false, _E, L) -> L.
+
+gb_sets_maybe_insert(false, _Val, Set) -> Set;
+gb_sets_maybe_insert(true, Val, Set) -> gb_sets:add(Val, Set).
+
+msg_status(IsPersistent, IsDelivered, SeqId,
+ Msg = #basic_message {id = MsgId}, MsgProps, IndexMaxSize) ->
+ #msg_status{seq_id = SeqId,
+ msg_id = MsgId,
+ msg = Msg,
+ is_persistent = IsPersistent,
+ is_delivered = IsDelivered,
+ msg_in_store = false,
+ index_on_disk = false,
+ persist_to = determine_persist_to(Msg, MsgProps, IndexMaxSize),
+ msg_props = MsgProps}.
+
+beta_msg_status({Msg = #basic_message{id = MsgId},
+ SeqId, MsgProps, IsPersistent, IsDelivered}) ->
+ MS0 = beta_msg_status0(SeqId, MsgProps, IsPersistent, IsDelivered),
+ MS0#msg_status{msg_id = MsgId,
+ msg = Msg,
+ persist_to = queue_index,
+ msg_in_store = false};
+
+beta_msg_status({MsgId, SeqId, MsgProps, IsPersistent, IsDelivered}) ->
+ MS0 = beta_msg_status0(SeqId, MsgProps, IsPersistent, IsDelivered),
+ MS0#msg_status{msg_id = MsgId,
+ msg = undefined,
+ persist_to = msg_store,
+ msg_in_store = true}.
+
+beta_msg_status0(SeqId, MsgProps, IsPersistent, IsDelivered) ->
+ #msg_status{seq_id = SeqId,
+ msg = undefined,
+ is_persistent = IsPersistent,
+ is_delivered = IsDelivered,
+ index_on_disk = true,
+ msg_props = MsgProps}.
+
+trim_msg_status(MsgStatus) ->
+ case persist_to(MsgStatus) of
+ msg_store -> MsgStatus#msg_status{msg = undefined};
+ queue_index -> MsgStatus
+ end.
+
+with_msg_store_state({MSCStateP, MSCStateT}, true, Fun) ->
+ {Result, MSCStateP1} = Fun(MSCStateP),
+ {Result, {MSCStateP1, MSCStateT}};
+with_msg_store_state({MSCStateP, MSCStateT}, false, Fun) ->
+ {Result, MSCStateT1} = Fun(MSCStateT),
+ {Result, {MSCStateP, MSCStateT1}}.
+
+with_immutable_msg_store_state(MSCState, IsPersistent, Fun) ->
+ {Res, MSCState} = with_msg_store_state(MSCState, IsPersistent,
+ fun (MSCState1) ->
+ {Fun(MSCState1), MSCState1}
+ end),
+ Res.
+
+msg_store_client_init(MsgStore, MsgOnDiskFun, Callback, VHost) ->
+ msg_store_client_init(MsgStore, rabbit_guid:gen(), MsgOnDiskFun,
+ Callback, VHost).
+
+msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback, VHost) ->
+ CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE),
+ rabbit_vhost_msg_store:client_init(VHost, MsgStore,
+ Ref, MsgOnDiskFun,
+ fun () ->
+ Callback(?MODULE, CloseFDsFun)
+ end).
+
+msg_store_write(MSCState, IsPersistent, MsgId, Msg) ->
+ with_immutable_msg_store_state(
+ MSCState, IsPersistent,
+ fun (MSCState1) ->
+ rabbit_msg_store:write_flow(MsgId, Msg, MSCState1)
+ end).
+
+msg_store_read(MSCState, IsPersistent, MsgId) ->
+ with_msg_store_state(
+ MSCState, IsPersistent,
+ fun (MSCState1) ->
+ rabbit_msg_store:read(MsgId, MSCState1)
+ end).
+
+msg_store_remove(MSCState, IsPersistent, MsgIds) ->
+ with_immutable_msg_store_state(
+ MSCState, IsPersistent,
+ fun (MCSState1) ->
+ rabbit_msg_store:remove(MsgIds, MCSState1)
+ end).
+
+msg_store_close_fds(MSCState, IsPersistent) ->
+ with_msg_store_state(
+ MSCState, IsPersistent,
+ fun (MSCState1) -> rabbit_msg_store:close_all_indicated(MSCState1) end).
+
+msg_store_close_fds_fun(IsPersistent) ->
+ fun (?MODULE, State = #vqstate { msg_store_clients = MSCState }) ->
+ {ok, MSCState1} = msg_store_close_fds(MSCState, IsPersistent),
+ State #vqstate { msg_store_clients = MSCState1 }
+ end.
+
+maybe_write_delivered(false, _SeqId, IndexState) ->
+ IndexState;
+maybe_write_delivered(true, SeqId, IndexState) ->
+ rabbit_queue_index:deliver([SeqId], IndexState).
+
+betas_from_index_entries(List, TransientThreshold, DelsAndAcksFun, State) ->
+ {Filtered, Delivers, Acks, RamReadyCount, RamBytes, TransientCount, TransientBytes} =
+ lists:foldr(
+ fun ({_MsgOrId, SeqId, _MsgProps, IsPersistent, IsDelivered} = M,
+ {Filtered1, Delivers1, Acks1, RRC, RB, TC, TB} = Acc) ->
+ case SeqId < TransientThreshold andalso not IsPersistent of
+ true -> {Filtered1,
+ cons_if(not IsDelivered, SeqId, Delivers1),
+ [SeqId | Acks1], RRC, RB, TC, TB};
+ false -> MsgStatus = m(beta_msg_status(M)),
+ HaveMsg = msg_in_ram(MsgStatus),
+ Size = msg_size(MsgStatus),
+ case is_msg_in_pending_acks(SeqId, State) of
+ false -> {?QUEUE:in_r(MsgStatus, Filtered1),
+ Delivers1, Acks1,
+ RRC + one_if(HaveMsg),
+ RB + one_if(HaveMsg) * Size,
+ TC + one_if(not IsPersistent),
+ TB + one_if(not IsPersistent) * Size};
+ true -> Acc %% [0]
+ end
+ end
+ end, {?QUEUE:new(), [], [], 0, 0, 0, 0}, List),
+ {Filtered, RamReadyCount, RamBytes, DelsAndAcksFun(Delivers, Acks, State),
+ TransientCount, TransientBytes}.
+%% [0] We don't increase RamBytes here, even though it pertains to
+%% unacked messages too, since if HaveMsg then the message must have
+%% been stored in the QI, thus the message must have been in
+%% qi_pending_ack, thus it must already have been in RAM.
+
+is_msg_in_pending_acks(SeqId, #vqstate { ram_pending_ack = RPA,
+ disk_pending_ack = DPA,
+ qi_pending_ack = QPA }) ->
+ (gb_trees:is_defined(SeqId, RPA) orelse
+ gb_trees:is_defined(SeqId, DPA) orelse
+ gb_trees:is_defined(SeqId, QPA)).
+
+expand_delta(SeqId, ?BLANK_DELTA_PATTERN(X), IsPersistent) ->
+ d(#delta { start_seq_id = SeqId, count = 1, end_seq_id = SeqId + 1,
+ transient = one_if(not IsPersistent)});
+expand_delta(SeqId, #delta { start_seq_id = StartSeqId,
+ count = Count,
+ transient = Transient } = Delta,
+ IsPersistent )
+ when SeqId < StartSeqId ->
+ d(Delta #delta { start_seq_id = SeqId, count = Count + 1,
+ transient = Transient + one_if(not IsPersistent)});
+expand_delta(SeqId, #delta { count = Count,
+ end_seq_id = EndSeqId,
+ transient = Transient } = Delta,
+ IsPersistent)
+ when SeqId >= EndSeqId ->
+ d(Delta #delta { count = Count + 1, end_seq_id = SeqId + 1,
+ transient = Transient + one_if(not IsPersistent)});
+expand_delta(_SeqId, #delta { count = Count,
+ transient = Transient } = Delta,
+ IsPersistent ) ->
+ d(Delta #delta { count = Count + 1,
+ transient = Transient + one_if(not IsPersistent) }).
+
+%%----------------------------------------------------------------------------
+%% Internal major helpers for Public API
+%%----------------------------------------------------------------------------
+
+init(IsDurable, IndexState, DeltaCount, DeltaBytes, Terms,
+ PersistentClient, TransientClient, VHost) ->
+ {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState),
+
+ {DeltaCount1, DeltaBytes1} =
+ case Terms of
+ non_clean_shutdown -> {DeltaCount, DeltaBytes};
+ _ -> {proplists:get_value(persistent_count,
+ Terms, DeltaCount),
+ proplists:get_value(persistent_bytes,
+ Terms, DeltaBytes)}
+ end,
+ Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of
+ true -> ?BLANK_DELTA;
+ false -> d(#delta { start_seq_id = LowSeqId,
+ count = DeltaCount1,
+ transient = 0,
+ end_seq_id = NextSeqId })
+ end,
+ Now = erlang:monotonic_time(),
+ IoBatchSize = rabbit_misc:get_env(rabbit, msg_store_io_batch_size,
+ ?IO_BATCH_SIZE),
+
+ {ok, IndexMaxSize} = application:get_env(
+ rabbit, queue_index_embed_msgs_below),
+ State = #vqstate {
+ q1 = ?QUEUE:new(),
+ q2 = ?QUEUE:new(),
+ delta = Delta,
+ q3 = ?QUEUE:new(),
+ q4 = ?QUEUE:new(),
+ next_seq_id = NextSeqId,
+ ram_pending_ack = gb_trees:empty(),
+ disk_pending_ack = gb_trees:empty(),
+ qi_pending_ack = gb_trees:empty(),
+ index_state = IndexState1,
+ msg_store_clients = {PersistentClient, TransientClient},
+ durable = IsDurable,
+ transient_threshold = NextSeqId,
+ qi_embed_msgs_below = IndexMaxSize,
+
+ len = DeltaCount1,
+ persistent_count = DeltaCount1,
+ bytes = DeltaBytes1,
+ persistent_bytes = DeltaBytes1,
+ delta_transient_bytes = 0,
+
+ target_ram_count = infinity,
+ ram_msg_count = 0,
+ ram_msg_count_prev = 0,
+ ram_ack_count_prev = 0,
+ ram_bytes = 0,
+ unacked_bytes = 0,
+ out_counter = 0,
+ in_counter = 0,
+ rates = blank_rates(Now),
+ msgs_on_disk = gb_sets:new(),
+ msg_indices_on_disk = gb_sets:new(),
+ unconfirmed = gb_sets:new(),
+ confirmed = gb_sets:new(),
+ ack_out_counter = 0,
+ ack_in_counter = 0,
+ disk_read_count = 0,
+ disk_write_count = 0,
+
+ io_batch_size = IoBatchSize,
+
+ mode = default,
+ memory_reduction_run_count = 0,
+ virtual_host = VHost},
+ a(maybe_deltas_to_betas(State)).
+
+blank_rates(Now) ->
+ #rates { in = 0.0,
+ out = 0.0,
+ ack_in = 0.0,
+ ack_out = 0.0,
+ timestamp = Now}.
+
+in_r(MsgStatus = #msg_status { msg = undefined },
+ State = #vqstate { mode = default, q3 = Q3, q4 = Q4 }) ->
+ case ?QUEUE:is_empty(Q4) of
+ true -> State #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3) };
+ false -> {Msg, State1 = #vqstate { q4 = Q4a }} =
+ read_msg(MsgStatus, State),
+ MsgStatus1 = MsgStatus#msg_status{msg = Msg},
+ stats(ready0, {MsgStatus, MsgStatus1}, 0,
+ State1 #vqstate { q4 = ?QUEUE:in_r(MsgStatus1, Q4a) })
+ end;
+in_r(MsgStatus,
+ State = #vqstate { mode = default, q4 = Q4 }) ->
+ State #vqstate { q4 = ?QUEUE:in_r(MsgStatus, Q4) };
+%% lazy queues
+in_r(MsgStatus = #msg_status { seq_id = SeqId, is_persistent = IsPersistent },
+ State = #vqstate { mode = lazy, q3 = Q3, delta = Delta}) ->
+ case ?QUEUE:is_empty(Q3) of
+ true ->
+ {_MsgStatus1, State1} =
+ maybe_write_to_disk(true, true, MsgStatus, State),
+ State2 = stats(ready0, {MsgStatus, none}, 1, State1),
+ Delta1 = expand_delta(SeqId, Delta, IsPersistent),
+ State2 #vqstate{ delta = Delta1};
+ false ->
+ State #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3) }
+ end.
+
+queue_out(State = #vqstate { mode = default, q4 = Q4 }) ->
+ case ?QUEUE:out(Q4) of
+ {empty, _Q4} ->
+ case fetch_from_q3(State) of
+ {empty, _State1} = Result -> Result;
+ {loaded, {MsgStatus, State1}} -> {{value, MsgStatus}, State1}
+ end;
+ {{value, MsgStatus}, Q4a} ->
+ {{value, MsgStatus}, State #vqstate { q4 = Q4a }}
+ end;
+%% lazy queues
+queue_out(State = #vqstate { mode = lazy }) ->
+ case fetch_from_q3(State) of
+ {empty, _State1} = Result -> Result;
+ {loaded, {MsgStatus, State1}} -> {{value, MsgStatus}, State1}
+ end.
+
+read_msg(#msg_status{msg = undefined,
+ msg_id = MsgId,
+ is_persistent = IsPersistent}, State) ->
+ read_msg(MsgId, IsPersistent, State);
+read_msg(#msg_status{msg = Msg}, State) ->
+ {Msg, State}.
+
+read_msg(MsgId, IsPersistent, State = #vqstate{msg_store_clients = MSCState,
+ disk_read_count = Count}) ->
+ {{ok, Msg = #basic_message {}}, MSCState1} =
+ msg_store_read(MSCState, IsPersistent, MsgId),
+ {Msg, State #vqstate {msg_store_clients = MSCState1,
+ disk_read_count = Count + 1}}.
+
+stats(Signs, Statuses, DeltaPaged, State) ->
+ stats0(expand_signs(Signs), expand_statuses(Statuses), DeltaPaged, State).
+
+expand_signs(ready0) -> {0, 0, true};
+expand_signs(lazy_pub) -> {1, 0, true};
+expand_signs({A, B}) -> {A, B, false}.
+
+expand_statuses({none, A}) -> {false, msg_in_ram(A), A};
+expand_statuses({B, none}) -> {msg_in_ram(B), false, B};
+expand_statuses({lazy, A}) -> {false , false, A};
+expand_statuses({B, A}) -> {msg_in_ram(B), msg_in_ram(A), B}.
+
+%% In this function at least, we are religious: the variable name
+%% contains "Ready" or "Unacked" iff that is what it counts. If
+%% neither is present it counts both.
+stats0({DeltaReady, DeltaUnacked, ReadyMsgPaged},
+ {InRamBefore, InRamAfter, MsgStatus}, DeltaPaged,
+ State = #vqstate{len = ReadyCount,
+ bytes = ReadyBytes,
+ ram_msg_count = RamReadyCount,
+ persistent_count = PersistentCount,
+ unacked_bytes = UnackedBytes,
+ ram_bytes = RamBytes,
+ delta_transient_bytes = DeltaBytes,
+ persistent_bytes = PersistentBytes}) ->
+ S = msg_size(MsgStatus),
+ DeltaTotal = DeltaReady + DeltaUnacked,
+ DeltaRam = case {InRamBefore, InRamAfter} of
+ {false, false} -> 0;
+ {false, true} -> 1;
+ {true, false} -> -1;
+ {true, true} -> 0
+ end,
+ DeltaRamReady = case DeltaReady of
+ 1 -> one_if(InRamAfter);
+ -1 -> -one_if(InRamBefore);
+ 0 when ReadyMsgPaged -> DeltaRam;
+ 0 -> 0
+ end,
+ DeltaPersistent = DeltaTotal * one_if(MsgStatus#msg_status.is_persistent),
+ State#vqstate{len = ReadyCount + DeltaReady,
+ ram_msg_count = RamReadyCount + DeltaRamReady,
+ persistent_count = PersistentCount + DeltaPersistent,
+ bytes = ReadyBytes + DeltaReady * S,
+ unacked_bytes = UnackedBytes + DeltaUnacked * S,
+ ram_bytes = RamBytes + DeltaRam * S,
+ persistent_bytes = PersistentBytes + DeltaPersistent * S,
+ delta_transient_bytes = DeltaBytes + DeltaPaged * one_if(not MsgStatus#msg_status.is_persistent) * S}.
+
+msg_size(#msg_status{msg_props = #message_properties{size = Size}}) -> Size.
+
+msg_in_ram(#msg_status{msg = Msg}) -> Msg =/= undefined.
+
+%% first param: AckRequired
+remove(true, MsgStatus = #msg_status {
+ seq_id = SeqId,
+ is_delivered = IsDelivered,
+ index_on_disk = IndexOnDisk },
+ State = #vqstate {out_counter = OutCount,
+ index_state = IndexState}) ->
+ %% Mark it delivered if necessary
+ IndexState1 = maybe_write_delivered(
+ IndexOnDisk andalso not IsDelivered,
+ SeqId, IndexState),
+
+ State1 = record_pending_ack(
+ MsgStatus #msg_status {
+ is_delivered = true }, State),
+
+ State2 = stats({-1, 1}, {MsgStatus, MsgStatus}, 0, State1),
+
+ {SeqId, maybe_update_rates(
+ State2 #vqstate {out_counter = OutCount + 1,
+ index_state = IndexState1})};
+
+%% This function body has the same behaviour as remove_queue_entries/3
+%% but instead of removing messages based on a ?QUEUE, this removes
+%% just one message, the one referenced by the MsgStatus provided.
+remove(false, MsgStatus = #msg_status {
+ seq_id = SeqId,
+ msg_id = MsgId,
+ is_persistent = IsPersistent,
+ is_delivered = IsDelivered,
+ msg_in_store = MsgInStore,
+ index_on_disk = IndexOnDisk },
+ State = #vqstate {out_counter = OutCount,
+ index_state = IndexState,
+ msg_store_clients = MSCState}) ->
+ %% Mark it delivered if necessary
+ IndexState1 = maybe_write_delivered(
+ IndexOnDisk andalso not IsDelivered,
+ SeqId, IndexState),
+
+ %% Remove from msg_store and queue index, if necessary
+ case MsgInStore of
+ true -> ok = msg_store_remove(MSCState, IsPersistent, [MsgId]);
+ false -> ok
+ end,
+
+ IndexState2 =
+ case IndexOnDisk of
+ true -> rabbit_queue_index:ack([SeqId], IndexState1);
+ false -> IndexState1
+ end,
+
+ State1 = stats({-1, 0}, {MsgStatus, none}, 0, State),
+
+ {undefined, maybe_update_rates(
+ State1 #vqstate {out_counter = OutCount + 1,
+ index_state = IndexState2})}.
+
+%% This function exists as a way to improve dropwhile/2
+%% performance. The idea of having this function is to optimise calls
+%% to rabbit_queue_index by batching delivers and acks, instead of
+%% sending them one by one.
+%%
+%% Instead of removing every message as their are popped from the
+%% queue, it first accumulates them and then removes them by calling
+%% remove_queue_entries/3, since the behaviour of
+%% remove_queue_entries/3 when used with
+%% process_delivers_and_acks_fun(deliver_and_ack) is the same as
+%% calling remove(false, MsgStatus, State).
+%%
+%% remove/3 also updates the out_counter in every call, but here we do
+%% it just once at the end.
+remove_by_predicate(Pred, State = #vqstate {out_counter = OutCount}) ->
+ {MsgProps, QAcc, State1} =
+ collect_by_predicate(Pred, ?QUEUE:new(), State),
+ State2 =
+ remove_queue_entries(
+ QAcc, process_delivers_and_acks_fun(deliver_and_ack), State1),
+ %% maybe_update_rates/1 is called in remove/2 for every
+ %% message. Since we update out_counter only once, we call it just
+ %% there.
+ {MsgProps, maybe_update_rates(
+ State2 #vqstate {
+ out_counter = OutCount + ?QUEUE:len(QAcc)})}.
+
+%% This function exists as a way to improve fetchwhile/4
+%% performance. The idea of having this function is to optimise calls
+%% to rabbit_queue_index by batching delivers, instead of sending them
+%% one by one.
+%%
+%% Fun is the function passed to fetchwhile/4 that's
+%% applied to every fetched message and used to build the fetchwhile/4
+%% result accumulator FetchAcc.
+fetch_by_predicate(Pred, Fun, FetchAcc,
+ State = #vqstate {
+ index_state = IndexState,
+ out_counter = OutCount}) ->
+ {MsgProps, QAcc, State1} =
+ collect_by_predicate(Pred, ?QUEUE:new(), State),
+
+ {Delivers, FetchAcc1, State2} =
+ process_queue_entries(QAcc, Fun, FetchAcc, State1),
+
+ IndexState1 = rabbit_queue_index:deliver(Delivers, IndexState),
+
+ {MsgProps, FetchAcc1, maybe_update_rates(
+ State2 #vqstate {
+ index_state = IndexState1,
+ out_counter = OutCount + ?QUEUE:len(QAcc)})}.
+
+%% We try to do here the same as what remove(true, State) does but
+%% processing several messages at the same time. The idea is to
+%% optimize rabbit_queue_index:deliver/2 calls by sending a list of
+%% SeqIds instead of one by one, thus process_queue_entries1 will
+%% accumulate the required deliveries, will record_pending_ack for
+%% each message, and will update stats, like remove/2 does.
+%%
+%% For the meaning of Fun and FetchAcc arguments see
+%% fetch_by_predicate/4 above.
+process_queue_entries(Q, Fun, FetchAcc, State) ->
+ ?QUEUE:foldl(fun (MsgStatus, Acc) ->
+ process_queue_entries1(MsgStatus, Fun, Acc)
+ end,
+ {[], FetchAcc, State}, Q).
+
+process_queue_entries1(
+ #msg_status { seq_id = SeqId, is_delivered = IsDelivered,
+ index_on_disk = IndexOnDisk} = MsgStatus,
+ Fun,
+ {Delivers, FetchAcc, State}) ->
+ {Msg, State1} = read_msg(MsgStatus, State),
+ State2 = record_pending_ack(
+ MsgStatus #msg_status {
+ is_delivered = true }, State1),
+ {cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers),
+ Fun(Msg, SeqId, FetchAcc),
+ stats({-1, 1}, {MsgStatus, MsgStatus}, 0, State2)}.
+
+collect_by_predicate(Pred, QAcc, State) ->
+ case queue_out(State) of
+ {empty, State1} ->
+ {undefined, QAcc, State1};
+ {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} ->
+ case Pred(MsgProps) of
+ true -> collect_by_predicate(Pred, ?QUEUE:in(MsgStatus, QAcc),
+ State1);
+ false -> {MsgProps, QAcc, in_r(MsgStatus, State1)}
+ end
+ end.
+
+%%----------------------------------------------------------------------------
+%% Helpers for Public API purge/1 function
+%%----------------------------------------------------------------------------
+
+%% The difference between purge_when_pending_acks/1
+%% vs. purge_and_index_reset/1 is that the first one issues a deliver
+%% and an ack to the queue index for every message that's being
+%% removed, while the later just resets the queue index state.
+purge_when_pending_acks(State) ->
+ State1 = purge1(process_delivers_and_acks_fun(deliver_and_ack), State),
+ a(State1).
+
+purge_and_index_reset(State) ->
+ State1 = purge1(process_delivers_and_acks_fun(none), State),
+ a(reset_qi_state(State1)).
+
+%% This function removes messages from each of {q1, q2, q3, q4}.
+%%
+%% With remove_queue_entries/3 q1 and q4 are emptied, while q2 and q3
+%% are specially handled by purge_betas_and_deltas/2.
+%%
+%% purge_betas_and_deltas/2 loads messages from the queue index,
+%% filling up q3 and in some cases moving messages form q2 to q3 while
+%% resetting q2 to an empty queue (see maybe_deltas_to_betas/2). The
+%% messages loaded into q3 are removed by calling
+%% remove_queue_entries/3 until there are no more messages to be read
+%% from the queue index. Messages are read in batches from the queue
+%% index.
+purge1(AfterFun, State = #vqstate { q4 = Q4}) ->
+ State1 = remove_queue_entries(Q4, AfterFun, State),
+
+ State2 = #vqstate {q1 = Q1} =
+ purge_betas_and_deltas(AfterFun, State1#vqstate{q4 = ?QUEUE:new()}),
+
+ State3 = remove_queue_entries(Q1, AfterFun, State2),
+
+ a(State3#vqstate{q1 = ?QUEUE:new()}).
+
+reset_qi_state(State = #vqstate{index_state = IndexState}) ->
+ State#vqstate{index_state =
+ rabbit_queue_index:reset_state(IndexState)}.
+
+is_pending_ack_empty(State) ->
+ count_pending_acks(State) =:= 0.
+
+is_unconfirmed_empty(#vqstate { unconfirmed = UC }) ->
+ gb_sets:is_empty(UC).
+
+count_pending_acks(#vqstate { ram_pending_ack = RPA,
+ disk_pending_ack = DPA,
+ qi_pending_ack = QPA }) ->
+ gb_trees:size(RPA) + gb_trees:size(DPA) + gb_trees:size(QPA).
+
+purge_betas_and_deltas(DelsAndAcksFun, State = #vqstate { mode = Mode }) ->
+ State0 = #vqstate { q3 = Q3 } =
+ case Mode of
+ lazy -> maybe_deltas_to_betas(DelsAndAcksFun, State);
+ _ -> State
+ end,
+
+ case ?QUEUE:is_empty(Q3) of
+ true -> State0;
+ false -> State1 = remove_queue_entries(Q3, DelsAndAcksFun, State0),
+ purge_betas_and_deltas(DelsAndAcksFun,
+ maybe_deltas_to_betas(
+ DelsAndAcksFun,
+ State1#vqstate{q3 = ?QUEUE:new()}))
+ end.
+
+remove_queue_entries(Q, DelsAndAcksFun,
+ State = #vqstate{msg_store_clients = MSCState}) ->
+ {MsgIdsByStore, Delivers, Acks, State1} =
+ ?QUEUE:foldl(fun remove_queue_entries1/2,
+ {maps:new(), [], [], State}, Q),
+ remove_msgs_by_id(MsgIdsByStore, MSCState),
+ DelsAndAcksFun(Delivers, Acks, State1).
+
+remove_queue_entries1(
+ #msg_status { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
+ msg_in_store = MsgInStore, index_on_disk = IndexOnDisk,
+ is_persistent = IsPersistent} = MsgStatus,
+ {MsgIdsByStore, Delivers, Acks, State}) ->
+ {case MsgInStore of
+ true -> rabbit_misc:maps_cons(IsPersistent, MsgId, MsgIdsByStore);
+ false -> MsgIdsByStore
+ end,
+ cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers),
+ cons_if(IndexOnDisk, SeqId, Acks),
+ stats({-1, 0}, {MsgStatus, none}, 0, State)}.
+
+process_delivers_and_acks_fun(deliver_and_ack) ->
+ fun (Delivers, Acks, State = #vqstate { index_state = IndexState }) ->
+ IndexState1 =
+ rabbit_queue_index:ack(
+ Acks, rabbit_queue_index:deliver(Delivers, IndexState)),
+ State #vqstate { index_state = IndexState1 }
+ end;
+process_delivers_and_acks_fun(_) ->
+ fun (_, _, State) ->
+ State
+ end.
+
+%%----------------------------------------------------------------------------
+%% Internal gubbins for publishing
+%%----------------------------------------------------------------------------
+
+publish1(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId },
+ MsgProps = #message_properties { needs_confirming = NeedsConfirming },
+ IsDelivered, _ChPid, _Flow, PersistFun,
+ State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4,
+ mode = default,
+ qi_embed_msgs_below = IndexMaxSize,
+ next_seq_id = SeqId,
+ in_counter = InCount,
+ durable = IsDurable,
+ unconfirmed = UC }) ->
+ IsPersistent1 = IsDurable andalso IsPersistent,
+ MsgStatus = msg_status(IsPersistent1, IsDelivered, SeqId, Msg, MsgProps, IndexMaxSize),
+ {MsgStatus1, State1} = PersistFun(false, false, MsgStatus, State),
+ State2 = case ?QUEUE:is_empty(Q3) of
+ false -> State1 #vqstate { q1 = ?QUEUE:in(m(MsgStatus1), Q1) };
+ true -> State1 #vqstate { q4 = ?QUEUE:in(m(MsgStatus1), Q4) }
+ end,
+ InCount1 = InCount + 1,
+ UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC),
+ stats({1, 0}, {none, MsgStatus1}, 0,
+ State2#vqstate{ next_seq_id = SeqId + 1,
+ in_counter = InCount1,
+ unconfirmed = UC1 });
+publish1(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId },
+ MsgProps = #message_properties { needs_confirming = NeedsConfirming },
+ IsDelivered, _ChPid, _Flow, PersistFun,
+ State = #vqstate { mode = lazy,
+ qi_embed_msgs_below = IndexMaxSize,
+ next_seq_id = SeqId,
+ in_counter = InCount,
+ durable = IsDurable,
+ unconfirmed = UC,
+ delta = Delta}) ->
+ IsPersistent1 = IsDurable andalso IsPersistent,
+ MsgStatus = msg_status(IsPersistent1, IsDelivered, SeqId, Msg, MsgProps, IndexMaxSize),
+ {MsgStatus1, State1} = PersistFun(true, true, MsgStatus, State),
+ Delta1 = expand_delta(SeqId, Delta, IsPersistent),
+ UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC),
+ stats(lazy_pub, {lazy, m(MsgStatus1)}, 1,
+ State1#vqstate{ delta = Delta1,
+ next_seq_id = SeqId + 1,
+ in_counter = InCount + 1,
+ unconfirmed = UC1}).
+
+batch_publish1({Msg, MsgProps, IsDelivered}, {ChPid, Flow, State}) ->
+ {ChPid, Flow, publish1(Msg, MsgProps, IsDelivered, ChPid, Flow,
+ fun maybe_prepare_write_to_disk/4, State)}.
+
+publish_delivered1(Msg = #basic_message { is_persistent = IsPersistent,
+ id = MsgId },
+ MsgProps = #message_properties {
+ needs_confirming = NeedsConfirming },
+ _ChPid, _Flow, PersistFun,
+ State = #vqstate { mode = default,
+ qi_embed_msgs_below = IndexMaxSize,
+ next_seq_id = SeqId,
+ out_counter = OutCount,
+ in_counter = InCount,
+ durable = IsDurable,
+ unconfirmed = UC }) ->
+ IsPersistent1 = IsDurable andalso IsPersistent,
+ MsgStatus = msg_status(IsPersistent1, true, SeqId, Msg, MsgProps, IndexMaxSize),
+ {MsgStatus1, State1} = PersistFun(false, false, MsgStatus, State),
+ State2 = record_pending_ack(m(MsgStatus1), State1),
+ UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC),
+ State3 = stats({0, 1}, {none, MsgStatus1}, 0,
+ State2 #vqstate { next_seq_id = SeqId + 1,
+ out_counter = OutCount + 1,
+ in_counter = InCount + 1,
+ unconfirmed = UC1 }),
+ {SeqId, State3};
+publish_delivered1(Msg = #basic_message { is_persistent = IsPersistent,
+ id = MsgId },
+ MsgProps = #message_properties {
+ needs_confirming = NeedsConfirming },
+ _ChPid, _Flow, PersistFun,
+ State = #vqstate { mode = lazy,
+ qi_embed_msgs_below = IndexMaxSize,
+ next_seq_id = SeqId,
+ out_counter = OutCount,
+ in_counter = InCount,
+ durable = IsDurable,
+ unconfirmed = UC }) ->
+ IsPersistent1 = IsDurable andalso IsPersistent,
+ MsgStatus = msg_status(IsPersistent1, true, SeqId, Msg, MsgProps, IndexMaxSize),
+ {MsgStatus1, State1} = PersistFun(true, true, MsgStatus, State),
+ State2 = record_pending_ack(m(MsgStatus1), State1),
+ UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC),
+ State3 = stats({0, 1}, {none, MsgStatus1}, 0,
+ State2 #vqstate { next_seq_id = SeqId + 1,
+ out_counter = OutCount + 1,
+ in_counter = InCount + 1,
+ unconfirmed = UC1 }),
+ {SeqId, State3}.
+
+batch_publish_delivered1({Msg, MsgProps}, {ChPid, Flow, SeqIds, State}) ->
+ {SeqId, State1} =
+ publish_delivered1(Msg, MsgProps, ChPid, Flow,
+ fun maybe_prepare_write_to_disk/4,
+ State),
+ {ChPid, Flow, [SeqId | SeqIds], State1}.
+
+maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status {
+ msg_in_store = true }, State) ->
+ {MsgStatus, State};
+maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
+ msg = Msg, msg_id = MsgId,
+ is_persistent = IsPersistent },
+ State = #vqstate{ msg_store_clients = MSCState,
+ disk_write_count = Count})
+ when Force orelse IsPersistent ->
+ case persist_to(MsgStatus) of
+ msg_store -> ok = msg_store_write(MSCState, IsPersistent, MsgId,
+ prepare_to_store(Msg)),
+ {MsgStatus#msg_status{msg_in_store = true},
+ State#vqstate{disk_write_count = Count + 1}};
+ queue_index -> {MsgStatus, State}
+ end;
+maybe_write_msg_to_disk(_Force, MsgStatus, State) ->
+ {MsgStatus, State}.
+
+%% Due to certain optimisations made inside
+%% rabbit_queue_index:pre_publish/7 we need to have two separate
+%% functions for index persistence. This one is only used when paging
+%% during memory pressure. We didn't want to modify
+%% maybe_write_index_to_disk/3 because that function is used in other
+%% places.
+maybe_batch_write_index_to_disk(_Force,
+ MsgStatus = #msg_status {
+ index_on_disk = true }, State) ->
+ {MsgStatus, State};
+maybe_batch_write_index_to_disk(Force,
+ MsgStatus = #msg_status {
+ msg = Msg,
+ msg_id = MsgId,
+ seq_id = SeqId,
+ is_persistent = IsPersistent,
+ is_delivered = IsDelivered,
+ msg_props = MsgProps},
+ State = #vqstate {
+ target_ram_count = TargetRamCount,
+ disk_write_count = DiskWriteCount,
+ index_state = IndexState})
+ when Force orelse IsPersistent ->
+ {MsgOrId, DiskWriteCount1} =
+ case persist_to(MsgStatus) of
+ msg_store -> {MsgId, DiskWriteCount};
+ queue_index -> {prepare_to_store(Msg), DiskWriteCount + 1}
+ end,
+ IndexState1 = rabbit_queue_index:pre_publish(
+ MsgOrId, SeqId, MsgProps, IsPersistent, IsDelivered,
+ TargetRamCount, IndexState),
+ {MsgStatus#msg_status{index_on_disk = true},
+ State#vqstate{index_state = IndexState1,
+ disk_write_count = DiskWriteCount1}};
+maybe_batch_write_index_to_disk(_Force, MsgStatus, State) ->
+ {MsgStatus, State}.
+
+maybe_write_index_to_disk(_Force, MsgStatus = #msg_status {
+ index_on_disk = true }, State) ->
+ {MsgStatus, State};
+maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
+ msg = Msg,
+ msg_id = MsgId,
+ seq_id = SeqId,
+ is_persistent = IsPersistent,
+ is_delivered = IsDelivered,
+ msg_props = MsgProps},
+ State = #vqstate{target_ram_count = TargetRamCount,
+ disk_write_count = DiskWriteCount,
+ index_state = IndexState})
+ when Force orelse IsPersistent ->
+ {MsgOrId, DiskWriteCount1} =
+ case persist_to(MsgStatus) of
+ msg_store -> {MsgId, DiskWriteCount};
+ queue_index -> {prepare_to_store(Msg), DiskWriteCount + 1}
+ end,
+ IndexState1 = rabbit_queue_index:publish(
+ MsgOrId, SeqId, MsgProps, IsPersistent, TargetRamCount,
+ IndexState),
+ IndexState2 = maybe_write_delivered(IsDelivered, SeqId, IndexState1),
+ {MsgStatus#msg_status{index_on_disk = true},
+ State#vqstate{index_state = IndexState2,
+ disk_write_count = DiskWriteCount1}};
+
+maybe_write_index_to_disk(_Force, MsgStatus, State) ->
+ {MsgStatus, State}.
+
+maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) ->
+ {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State),
+ maybe_write_index_to_disk(ForceIndex, MsgStatus1, State1).
+
+maybe_prepare_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) ->
+ {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State),
+ maybe_batch_write_index_to_disk(ForceIndex, MsgStatus1, State1).
+
+determine_persist_to(#basic_message{
+ content = #content{properties = Props,
+ properties_bin = PropsBin}},
+ #message_properties{size = BodySize},
+ IndexMaxSize) ->
+ %% The >= is so that you can set the env to 0 and never persist
+ %% to the index.
+ %%
+ %% We want this to be fast, so we avoid size(term_to_binary())
+ %% here, or using the term size estimation from truncate.erl, both
+ %% of which are too slow. So instead, if the message body size
+ %% goes over the limit then we avoid any other checks.
+ %%
+ %% If it doesn't we need to decide if the properties will push
+ %% it past the limit. If we have the encoded properties (usual
+ %% case) we can just check their size. If we don't (message came
+ %% via the direct client), we make a guess based on the number of
+ %% headers.
+ case BodySize >= IndexMaxSize of
+ true -> msg_store;
+ false -> Est = case is_binary(PropsBin) of
+ true -> BodySize + size(PropsBin);
+ false -> #'P_basic'{headers = Hs} = Props,
+ case Hs of
+ undefined -> 0;
+ _ -> length(Hs)
+ end * ?HEADER_GUESS_SIZE + BodySize
+ end,
+ case Est >= IndexMaxSize of
+ true -> msg_store;
+ false -> queue_index
+ end
+ end.
+
+persist_to(#msg_status{persist_to = To}) -> To.
+
+prepare_to_store(Msg) ->
+ Msg#basic_message{
+ %% don't persist any recoverable decoded properties
+ content = rabbit_binary_parser:clear_decoded_content(
+ Msg #basic_message.content)}.
+
+%%----------------------------------------------------------------------------
+%% Internal gubbins for acks
+%%----------------------------------------------------------------------------
+
+record_pending_ack(#msg_status { seq_id = SeqId } = MsgStatus,
+ State = #vqstate { ram_pending_ack = RPA,
+ disk_pending_ack = DPA,
+ qi_pending_ack = QPA,
+ ack_in_counter = AckInCount}) ->
+ Insert = fun (Tree) -> gb_trees:insert(SeqId, MsgStatus, Tree) end,
+ {RPA1, DPA1, QPA1} =
+ case {msg_in_ram(MsgStatus), persist_to(MsgStatus)} of
+ {false, _} -> {RPA, Insert(DPA), QPA};
+ {_, queue_index} -> {RPA, DPA, Insert(QPA)};
+ {_, msg_store} -> {Insert(RPA), DPA, QPA}
+ end,
+ State #vqstate { ram_pending_ack = RPA1,
+ disk_pending_ack = DPA1,
+ qi_pending_ack = QPA1,
+ ack_in_counter = AckInCount + 1}.
+
+lookup_pending_ack(SeqId, #vqstate { ram_pending_ack = RPA,
+ disk_pending_ack = DPA,
+ qi_pending_ack = QPA}) ->
+ case gb_trees:lookup(SeqId, RPA) of
+ {value, V} -> V;
+ none -> case gb_trees:lookup(SeqId, DPA) of
+ {value, V} -> V;
+ none -> gb_trees:get(SeqId, QPA)
+ end
+ end.
+
+%% First parameter = UpdateStats
+remove_pending_ack(true, SeqId, State) ->
+ case remove_pending_ack(false, SeqId, State) of
+ {none, _} ->
+ {none, State};
+ {MsgStatus, State1} ->
+ {MsgStatus, stats({0, -1}, {MsgStatus, none}, 0, State1)}
+ end;
+remove_pending_ack(false, SeqId, State = #vqstate{ram_pending_ack = RPA,
+ disk_pending_ack = DPA,
+ qi_pending_ack = QPA}) ->
+ case gb_trees:lookup(SeqId, RPA) of
+ {value, V} -> RPA1 = gb_trees:delete(SeqId, RPA),
+ {V, State #vqstate { ram_pending_ack = RPA1 }};
+ none -> case gb_trees:lookup(SeqId, DPA) of
+ {value, V} ->
+ DPA1 = gb_trees:delete(SeqId, DPA),
+ {V, State#vqstate{disk_pending_ack = DPA1}};
+ none ->
+ case gb_trees:lookup(SeqId, QPA) of
+ {value, V} ->
+ QPA1 = gb_trees:delete(SeqId, QPA),
+ {V, State#vqstate{qi_pending_ack = QPA1}};
+ none ->
+ {none, State}
+ end
+ end
+ end.
+
+purge_pending_ack(KeepPersistent,
+ State = #vqstate { index_state = IndexState,
+ msg_store_clients = MSCState }) ->
+ {IndexOnDiskSeqIds, MsgIdsByStore, State1} = purge_pending_ack1(State),
+ case KeepPersistent of
+ true -> remove_transient_msgs_by_id(MsgIdsByStore, MSCState),
+ State1;
+ false -> IndexState1 =
+ rabbit_queue_index:ack(IndexOnDiskSeqIds, IndexState),
+ remove_msgs_by_id(MsgIdsByStore, MSCState),
+ State1 #vqstate { index_state = IndexState1 }
+ end.
+
+purge_pending_ack_delete_and_terminate(
+ State = #vqstate { index_state = IndexState,
+ msg_store_clients = MSCState }) ->
+ {_, MsgIdsByStore, State1} = purge_pending_ack1(State),
+ IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState),
+ remove_msgs_by_id(MsgIdsByStore, MSCState),
+ State1 #vqstate { index_state = IndexState1 }.
+
+purge_pending_ack1(State = #vqstate { ram_pending_ack = RPA,
+ disk_pending_ack = DPA,
+ qi_pending_ack = QPA }) ->
+ F = fun (_SeqId, MsgStatus, Acc) -> accumulate_ack(MsgStatus, Acc) end,
+ {IndexOnDiskSeqIds, MsgIdsByStore, _AllMsgIds} =
+ rabbit_misc:gb_trees_fold(
+ F, rabbit_misc:gb_trees_fold(
+ F, rabbit_misc:gb_trees_fold(
+ F, accumulate_ack_init(), RPA), DPA), QPA),
+ State1 = State #vqstate { ram_pending_ack = gb_trees:empty(),
+ disk_pending_ack = gb_trees:empty(),
+ qi_pending_ack = gb_trees:empty()},
+ {IndexOnDiskSeqIds, MsgIdsByStore, State1}.
+
+%% MsgIdsByStore is an map with two keys:
+%%
+%% true: holds a list of Persistent Message Ids.
+%% false: holds a list of Transient Message Ids.
+%%
+%% When we call maps:to_list/1 we get two sets of msg ids, where
+%% IsPersistent is either true for persistent messages or false for
+%% transient ones. The msg_store_remove/3 function takes this boolean
+%% flag to determine from which store the messages should be removed
+%% from.
+remove_msgs_by_id(MsgIdsByStore, MSCState) ->
+ [ok = msg_store_remove(MSCState, IsPersistent, MsgIds)
+ || {IsPersistent, MsgIds} <- maps:to_list(MsgIdsByStore)].
+
+remove_transient_msgs_by_id(MsgIdsByStore, MSCState) ->
+ case maps:find(false, MsgIdsByStore) of
+ error -> ok;
+ {ok, MsgIds} -> ok = msg_store_remove(MSCState, false, MsgIds)
+ end.
+
+accumulate_ack_init() -> {[], maps:new(), []}.
+
+accumulate_ack(#msg_status { seq_id = SeqId,
+ msg_id = MsgId,
+ is_persistent = IsPersistent,
+ msg_in_store = MsgInStore,
+ index_on_disk = IndexOnDisk },
+ {IndexOnDiskSeqIdsAcc, MsgIdsByStore, AllMsgIds}) ->
+ {cons_if(IndexOnDisk, SeqId, IndexOnDiskSeqIdsAcc),
+ case MsgInStore of
+ true -> rabbit_misc:maps_cons(IsPersistent, MsgId, MsgIdsByStore);
+ false -> MsgIdsByStore
+ end,
+ [MsgId | AllMsgIds]}.
+
+%%----------------------------------------------------------------------------
+%% Internal plumbing for confirms (aka publisher acks)
+%%----------------------------------------------------------------------------
+
+record_confirms(MsgIdSet, State = #vqstate { msgs_on_disk = MOD,
+ msg_indices_on_disk = MIOD,
+ unconfirmed = UC,
+ confirmed = C }) ->
+ State #vqstate {
+ msgs_on_disk = rabbit_misc:gb_sets_difference(MOD, MsgIdSet),
+ msg_indices_on_disk = rabbit_misc:gb_sets_difference(MIOD, MsgIdSet),
+ unconfirmed = rabbit_misc:gb_sets_difference(UC, MsgIdSet),
+ confirmed = gb_sets:union(C, MsgIdSet) }.
+
+msgs_written_to_disk(Callback, MsgIdSet, ignored) ->
+ Callback(?MODULE,
+ fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end);
+msgs_written_to_disk(Callback, MsgIdSet, written) ->
+ Callback(?MODULE,
+ fun (?MODULE, State = #vqstate { msgs_on_disk = MOD,
+ msg_indices_on_disk = MIOD,
+ unconfirmed = UC }) ->
+ Confirmed = gb_sets:intersection(UC, MsgIdSet),
+ record_confirms(gb_sets:intersection(MsgIdSet, MIOD),
+ State #vqstate {
+ msgs_on_disk =
+ gb_sets:union(MOD, Confirmed) })
+ end).
+
+msg_indices_written_to_disk(Callback, MsgIdSet) ->
+ Callback(?MODULE,
+ fun (?MODULE, State = #vqstate { msgs_on_disk = MOD,
+ msg_indices_on_disk = MIOD,
+ unconfirmed = UC }) ->
+ Confirmed = gb_sets:intersection(UC, MsgIdSet),
+ record_confirms(gb_sets:intersection(MsgIdSet, MOD),
+ State #vqstate {
+ msg_indices_on_disk =
+ gb_sets:union(MIOD, Confirmed) })
+ end).
+
+msgs_and_indices_written_to_disk(Callback, MsgIdSet) ->
+ Callback(?MODULE,
+ fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end).
+
+%%----------------------------------------------------------------------------
+%% Internal plumbing for requeue
+%%----------------------------------------------------------------------------
+
+publish_alpha(#msg_status { msg = undefined } = MsgStatus, State) ->
+ {Msg, State1} = read_msg(MsgStatus, State),
+ MsgStatus1 = MsgStatus#msg_status { msg = Msg },
+ {MsgStatus1, stats({1, -1}, {MsgStatus, MsgStatus1}, 0, State1)};
+publish_alpha(MsgStatus, State) ->
+ {MsgStatus, stats({1, -1}, {MsgStatus, MsgStatus}, 0, State)}.
+
+publish_beta(MsgStatus, State) ->
+ {MsgStatus1, State1} = maybe_prepare_write_to_disk(true, false, MsgStatus, State),
+ MsgStatus2 = m(trim_msg_status(MsgStatus1)),
+ {MsgStatus2, stats({1, -1}, {MsgStatus, MsgStatus2}, 0, State1)}.
+
+%% Rebuild queue, inserting sequence ids to maintain ordering
+queue_merge(SeqIds, Q, MsgIds, Limit, PubFun, State) ->
+ queue_merge(SeqIds, Q, ?QUEUE:new(), MsgIds,
+ Limit, PubFun, State).
+
+queue_merge([SeqId | Rest] = SeqIds, Q, Front, MsgIds,
+ Limit, PubFun, State)
+ when Limit == undefined orelse SeqId < Limit ->
+ case ?QUEUE:out(Q) of
+ {{value, #msg_status { seq_id = SeqIdQ } = MsgStatus}, Q1}
+ when SeqIdQ < SeqId ->
+ %% enqueue from the remaining queue
+ queue_merge(SeqIds, Q1, ?QUEUE:in(MsgStatus, Front), MsgIds,
+ Limit, PubFun, State);
+ {_, _Q1} ->
+ %% enqueue from the remaining list of sequence ids
+ case msg_from_pending_ack(SeqId, State) of
+ {none, _} ->
+ queue_merge(Rest, Q, Front, MsgIds, Limit, PubFun, State);
+ {MsgStatus, State1} ->
+ {#msg_status { msg_id = MsgId } = MsgStatus1, State2} =
+ PubFun(MsgStatus, State1),
+ queue_merge(Rest, Q, ?QUEUE:in(MsgStatus1, Front), [MsgId | MsgIds],
+ Limit, PubFun, State2)
+ end
+ end;
+queue_merge(SeqIds, Q, Front, MsgIds,
+ _Limit, _PubFun, State) ->
+ {SeqIds, ?QUEUE:join(Front, Q), MsgIds, State}.
+
+delta_merge([], Delta, MsgIds, State) ->
+ {Delta, MsgIds, State};
+delta_merge(SeqIds, Delta, MsgIds, State) ->
+ lists:foldl(fun (SeqId, {Delta0, MsgIds0, State0} = Acc) ->
+ case msg_from_pending_ack(SeqId, State0) of
+ {none, _} ->
+ Acc;
+ {#msg_status { msg_id = MsgId,
+ is_persistent = IsPersistent } = MsgStatus, State1} ->
+ {_MsgStatus, State2} =
+ maybe_prepare_write_to_disk(true, true, MsgStatus, State1),
+ {expand_delta(SeqId, Delta0, IsPersistent), [MsgId | MsgIds0],
+ stats({1, -1}, {MsgStatus, none}, 1, State2)}
+ end
+ end, {Delta, MsgIds, State}, SeqIds).
+
+%% Mostly opposite of record_pending_ack/2
+msg_from_pending_ack(SeqId, State) ->
+ case remove_pending_ack(false, SeqId, State) of
+ {none, _} ->
+ {none, State};
+ {#msg_status { msg_props = MsgProps } = MsgStatus, State1} ->
+ {MsgStatus #msg_status {
+ msg_props = MsgProps #message_properties { needs_confirming = false } },
+ State1}
+ end.
+
+beta_limit(Q) ->
+ case ?QUEUE:peek(Q) of
+ {value, #msg_status { seq_id = SeqId }} -> SeqId;
+ empty -> undefined
+ end.
+
+delta_limit(?BLANK_DELTA_PATTERN(_X)) -> undefined;
+delta_limit(#delta { start_seq_id = StartSeqId }) -> StartSeqId.
+
+%%----------------------------------------------------------------------------
+%% Iterator
+%%----------------------------------------------------------------------------
+
+ram_ack_iterator(State) ->
+ {ack, gb_trees:iterator(State#vqstate.ram_pending_ack)}.
+
+disk_ack_iterator(State) ->
+ {ack, gb_trees:iterator(State#vqstate.disk_pending_ack)}.
+
+qi_ack_iterator(State) ->
+ {ack, gb_trees:iterator(State#vqstate.qi_pending_ack)}.
+
+msg_iterator(State) -> istate(start, State).
+
+istate(start, State) -> {q4, State#vqstate.q4, State};
+istate(q4, State) -> {q3, State#vqstate.q3, State};
+istate(q3, State) -> {delta, State#vqstate.delta, State};
+istate(delta, State) -> {q2, State#vqstate.q2, State};
+istate(q2, State) -> {q1, State#vqstate.q1, State};
+istate(q1, _State) -> done.
+
+next({ack, It}, IndexState) ->
+ case gb_trees:next(It) of
+ none -> {empty, IndexState};
+ {_SeqId, MsgStatus, It1} -> Next = {ack, It1},
+ {value, MsgStatus, true, Next, IndexState}
+ end;
+next(done, IndexState) -> {empty, IndexState};
+next({delta, #delta{start_seq_id = SeqId,
+ end_seq_id = SeqId}, State}, IndexState) ->
+ next(istate(delta, State), IndexState);
+next({delta, #delta{start_seq_id = SeqId,
+ end_seq_id = SeqIdEnd} = Delta, State}, IndexState) ->
+ SeqIdB = rabbit_queue_index:next_segment_boundary(SeqId),
+ SeqId1 = lists:min([SeqIdB, SeqIdEnd]),
+ {List, IndexState1} = rabbit_queue_index:read(SeqId, SeqId1, IndexState),
+ next({delta, Delta#delta{start_seq_id = SeqId1}, List, State}, IndexState1);
+next({delta, Delta, [], State}, IndexState) ->
+ next({delta, Delta, State}, IndexState);
+next({delta, Delta, [{_, SeqId, _, _, _} = M | Rest], State}, IndexState) ->
+ case is_msg_in_pending_acks(SeqId, State) of
+ false -> Next = {delta, Delta, Rest, State},
+ {value, beta_msg_status(M), false, Next, IndexState};
+ true -> next({delta, Delta, Rest, State}, IndexState)
+ end;
+next({Key, Q, State}, IndexState) ->
+ case ?QUEUE:out(Q) of
+ {empty, _Q} -> next(istate(Key, State), IndexState);
+ {{value, MsgStatus}, QN} -> Next = {Key, QN, State},
+ {value, MsgStatus, false, Next, IndexState}
+ end.
+
+inext(It, {Its, IndexState}) ->
+ case next(It, IndexState) of
+ {empty, IndexState1} ->
+ {Its, IndexState1};
+ {value, MsgStatus1, Unacked, It1, IndexState1} ->
+ {[{MsgStatus1, Unacked, It1} | Its], IndexState1}
+ end.
+
+ifold(_Fun, Acc, [], State0) ->
+ {Acc, State0};
+ifold(Fun, Acc, Its0, State0) ->
+ [{MsgStatus, Unacked, It} | Rest] =
+ lists:sort(fun ({#msg_status{seq_id = SeqId1}, _, _},
+ {#msg_status{seq_id = SeqId2}, _, _}) ->
+ SeqId1 =< SeqId2
+ end, Its0),
+ {Msg, State1} = read_msg(MsgStatus, State0),
+ case Fun(Msg, MsgStatus#msg_status.msg_props, Unacked, Acc) of
+ {stop, Acc1} ->
+ {Acc1, State1};
+ {cont, Acc1} ->
+ IndexState0 = State1#vqstate.index_state,
+ {Its1, IndexState1} = inext(It, {Rest, IndexState0}),
+ State2 = State1#vqstate{index_state = IndexState1},
+ ifold(Fun, Acc1, Its1, State2)
+ end.
+
+%%----------------------------------------------------------------------------
+%% Phase changes
+%%----------------------------------------------------------------------------
+
+maybe_reduce_memory_use(State = #vqstate {memory_reduction_run_count = MRedRunCount,
+ mode = Mode}) ->
+ case MRedRunCount >= ?EXPLICIT_GC_RUN_OP_THRESHOLD(Mode) of
+ true -> State1 = reduce_memory_use(State),
+ State1#vqstate{memory_reduction_run_count = 0};
+ false -> State#vqstate{memory_reduction_run_count = MRedRunCount + 1}
+ end.
+
+reduce_memory_use(State = #vqstate { target_ram_count = infinity }) ->
+ State;
+reduce_memory_use(State = #vqstate {
+ mode = default,
+ ram_pending_ack = RPA,
+ ram_msg_count = RamMsgCount,
+ target_ram_count = TargetRamCount,
+ io_batch_size = IoBatchSize,
+ rates = #rates { in = AvgIngress,
+ out = AvgEgress,
+ ack_in = AvgAckIngress,
+ ack_out = AvgAckEgress } }) ->
+ {CreditDiscBound, _} =rabbit_misc:get_env(rabbit,
+ msg_store_credit_disc_bound,
+ ?CREDIT_DISC_BOUND),
+ {NeedResumeA2B, State1} = {_, #vqstate { q2 = Q2, q3 = Q3 }} =
+ case chunk_size(RamMsgCount + gb_trees:size(RPA), TargetRamCount) of
+ 0 -> {false, State};
+ %% Reduce memory of pending acks and alphas. The order is
+ %% determined based on which is growing faster. Whichever
+ %% comes second may very well get a quota of 0 if the
+ %% first manages to push out the max number of messages.
+ A2BChunk ->
+ %% In case there are few messages to be sent to a message store
+ %% and many messages to be embedded to the queue index,
+ %% we should limit the number of messages to be flushed
+ %% to avoid blocking the process.
+ A2BChunkActual = case A2BChunk > CreditDiscBound * 2 of
+ true -> CreditDiscBound * 2;
+ false -> A2BChunk
+ end,
+ Funs = case ((AvgAckIngress - AvgAckEgress) >
+ (AvgIngress - AvgEgress)) of
+ true -> [fun limit_ram_acks/2,
+ fun push_alphas_to_betas/2];
+ false -> [fun push_alphas_to_betas/2,
+ fun limit_ram_acks/2]
+ end,
+ {Quota, State2} = lists:foldl(fun (ReduceFun, {QuotaN, StateN}) ->
+ ReduceFun(QuotaN, StateN)
+ end, {A2BChunkActual, State}, Funs),
+ {(Quota == 0) andalso (A2BChunk > A2BChunkActual), State2}
+ end,
+ Permitted = permitted_beta_count(State1),
+ {NeedResumeB2D, State3} =
+ %% If there are more messages with their queue position held in RAM,
+ %% a.k.a. betas, in Q2 & Q3 than IoBatchSize,
+ %% write their queue position to disk, a.k.a. push_betas_to_deltas
+ case chunk_size(?QUEUE:len(Q2) + ?QUEUE:len(Q3),
+ Permitted) of
+ B2DChunk when B2DChunk >= IoBatchSize ->
+ %% Same as for alphas to betas. Limit a number of messages
+ %% to be flushed to disk at once to avoid blocking the process.
+ B2DChunkActual = case B2DChunk > CreditDiscBound * 2 of
+ true -> CreditDiscBound * 2;
+ false -> B2DChunk
+ end,
+ StateBD = push_betas_to_deltas(B2DChunkActual, State1),
+ {B2DChunk > B2DChunkActual, StateBD};
+ _ ->
+ {false, State1}
+ end,
+ %% We can be blocked by the credit flow, or limited by a batch size,
+ %% or finished with flushing.
+ %% If blocked by the credit flow - the credit grant will resume processing,
+ %% if limited by a batch - the batch continuation message should be sent.
+ %% The continuation message will be prioritised over publishes,
+ %% but not consumptions, so the queue can make progess.
+ Blocked = credit_flow:blocked(),
+ case {Blocked, NeedResumeA2B orelse NeedResumeB2D} of
+ %% Credit bump will continue paging
+ {true, _} -> State3;
+ %% Finished with paging
+ {false, false} -> State3;
+ %% Planning next batch
+ {false, true} ->
+ %% We don't want to use self-credit-flow, because it's harder to
+ %% reason about. So the process sends a (prioritised) message to
+ %% itself and sets a waiting_bump value to keep the message box clean
+ maybe_bump_reduce_memory_use(State3)
+ end;
+%% When using lazy queues, there are no alphas, so we don't need to
+%% call push_alphas_to_betas/2.
+reduce_memory_use(State = #vqstate {
+ mode = lazy,
+ ram_pending_ack = RPA,
+ ram_msg_count = RamMsgCount,
+ target_ram_count = TargetRamCount }) ->
+ State1 = #vqstate { q3 = Q3 } =
+ case chunk_size(RamMsgCount + gb_trees:size(RPA), TargetRamCount) of
+ 0 -> State;
+ S1 -> {_, State2} = limit_ram_acks(S1, State),
+ State2
+ end,
+
+ State3 =
+ case chunk_size(?QUEUE:len(Q3),
+ permitted_beta_count(State1)) of
+ 0 ->
+ State1;
+ S2 ->
+ push_betas_to_deltas(S2, State1)
+ end,
+ garbage_collect(),
+ State3.
+
+maybe_bump_reduce_memory_use(State = #vqstate{ waiting_bump = true }) ->
+ State;
+maybe_bump_reduce_memory_use(State) ->
+ self() ! bump_reduce_memory_use,
+ State#vqstate{ waiting_bump = true }.
+
+limit_ram_acks(0, State) ->
+ {0, ui(State)};
+limit_ram_acks(Quota, State = #vqstate { ram_pending_ack = RPA,
+ disk_pending_ack = DPA }) ->
+ case gb_trees:is_empty(RPA) of
+ true ->
+ {Quota, ui(State)};
+ false ->
+ {SeqId, MsgStatus, RPA1} = gb_trees:take_largest(RPA),
+ {MsgStatus1, State1} =
+ maybe_prepare_write_to_disk(true, false, MsgStatus, State),
+ MsgStatus2 = m(trim_msg_status(MsgStatus1)),
+ DPA1 = gb_trees:insert(SeqId, MsgStatus2, DPA),
+ limit_ram_acks(Quota - 1,
+ stats({0, 0}, {MsgStatus, MsgStatus2}, 0,
+ State1 #vqstate { ram_pending_ack = RPA1,
+ disk_pending_ack = DPA1 }))
+ end.
+
+permitted_beta_count(#vqstate { len = 0 }) ->
+ infinity;
+permitted_beta_count(#vqstate { mode = lazy,
+ target_ram_count = TargetRamCount}) ->
+ TargetRamCount;
+permitted_beta_count(#vqstate { target_ram_count = 0, q3 = Q3 }) ->
+ lists:min([?QUEUE:len(Q3), rabbit_queue_index:next_segment_boundary(0)]);
+permitted_beta_count(#vqstate { q1 = Q1,
+ q4 = Q4,
+ target_ram_count = TargetRamCount,
+ len = Len }) ->
+ BetaDelta = Len - ?QUEUE:len(Q1) - ?QUEUE:len(Q4),
+ lists:max([rabbit_queue_index:next_segment_boundary(0),
+ BetaDelta - ((BetaDelta * BetaDelta) div
+ (BetaDelta + TargetRamCount))]).
+
+chunk_size(Current, Permitted)
+ when Permitted =:= infinity orelse Permitted >= Current ->
+ 0;
+chunk_size(Current, Permitted) ->
+ Current - Permitted.
+
+fetch_from_q3(State = #vqstate { mode = default,
+ q1 = Q1,
+ q2 = Q2,
+ delta = #delta { count = DeltaCount },
+ q3 = Q3,
+ q4 = Q4 }) ->
+ case ?QUEUE:out(Q3) of
+ {empty, _Q3} ->
+ {empty, State};
+ {{value, MsgStatus}, Q3a} ->
+ State1 = State #vqstate { q3 = Q3a },
+ State2 = case {?QUEUE:is_empty(Q3a), 0 == DeltaCount} of
+ {true, true} ->
+ %% q3 is now empty, it wasn't before;
+ %% delta is still empty. So q2 must be
+ %% empty, and we know q4 is empty
+ %% otherwise we wouldn't be loading from
+ %% q3. As such, we can just set q4 to Q1.
+ true = ?QUEUE:is_empty(Q2), %% ASSERTION
+ true = ?QUEUE:is_empty(Q4), %% ASSERTION
+ State1 #vqstate { q1 = ?QUEUE:new(), q4 = Q1 };
+ {true, false} ->
+ maybe_deltas_to_betas(State1);
+ {false, _} ->
+ %% q3 still isn't empty, we've not
+ %% touched delta, so the invariants
+ %% between q1, q2, delta and q3 are
+ %% maintained
+ State1
+ end,
+ {loaded, {MsgStatus, State2}}
+ end;
+%% lazy queues
+fetch_from_q3(State = #vqstate { mode = lazy,
+ delta = #delta { count = DeltaCount },
+ q3 = Q3 }) ->
+ case ?QUEUE:out(Q3) of
+ {empty, _Q3} when DeltaCount =:= 0 ->
+ {empty, State};
+ {empty, _Q3} ->
+ fetch_from_q3(maybe_deltas_to_betas(State));
+ {{value, MsgStatus}, Q3a} ->
+ State1 = State #vqstate { q3 = Q3a },
+ {loaded, {MsgStatus, State1}}
+ end.
+
+maybe_deltas_to_betas(State) ->
+ AfterFun = process_delivers_and_acks_fun(deliver_and_ack),
+ maybe_deltas_to_betas(AfterFun, State).
+
+maybe_deltas_to_betas(_DelsAndAcksFun,
+ State = #vqstate {delta = ?BLANK_DELTA_PATTERN(X) }) ->
+ State;
+maybe_deltas_to_betas(DelsAndAcksFun,
+ State = #vqstate {
+ q2 = Q2,
+ delta = Delta,
+ q3 = Q3,
+ index_state = IndexState,
+ ram_msg_count = RamMsgCount,
+ ram_bytes = RamBytes,
+ disk_read_count = DiskReadCount,
+ delta_transient_bytes = DeltaTransientBytes,
+ transient_threshold = TransientThreshold }) ->
+ #delta { start_seq_id = DeltaSeqId,
+ count = DeltaCount,
+ transient = Transient,
+ end_seq_id = DeltaSeqIdEnd } = Delta,
+ DeltaSeqId1 =
+ lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId),
+ DeltaSeqIdEnd]),
+ {List, IndexState1} = rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1,
+ IndexState),
+ {Q3a, RamCountsInc, RamBytesInc, State1, TransientCount, TransientBytes} =
+ betas_from_index_entries(List, TransientThreshold,
+ DelsAndAcksFun,
+ State #vqstate { index_state = IndexState1 }),
+ State2 = State1 #vqstate { ram_msg_count = RamMsgCount + RamCountsInc,
+ ram_bytes = RamBytes + RamBytesInc,
+ disk_read_count = DiskReadCount + RamCountsInc },
+ case ?QUEUE:len(Q3a) of
+ 0 ->
+ %% we ignored every message in the segment due to it being
+ %% transient and below the threshold
+ maybe_deltas_to_betas(
+ DelsAndAcksFun,
+ State2 #vqstate {
+ delta = d(Delta #delta { start_seq_id = DeltaSeqId1 })});
+ Q3aLen ->
+ Q3b = ?QUEUE:join(Q3, Q3a),
+ case DeltaCount - Q3aLen of
+ 0 ->
+ %% delta is now empty, but it wasn't before, so
+ %% can now join q2 onto q3
+ State2 #vqstate { q2 = ?QUEUE:new(),
+ delta = ?BLANK_DELTA,
+ q3 = ?QUEUE:join(Q3b, Q2),
+ delta_transient_bytes = 0};
+ N when N > 0 ->
+ Delta1 = d(#delta { start_seq_id = DeltaSeqId1,
+ count = N,
+ transient = Transient - TransientCount,
+ end_seq_id = DeltaSeqIdEnd }),
+ State2 #vqstate { delta = Delta1,
+ q3 = Q3b,
+ delta_transient_bytes = DeltaTransientBytes - TransientBytes }
+ end
+ end.
+
+push_alphas_to_betas(Quota, State) ->
+ {Quota1, State1} =
+ push_alphas_to_betas(
+ fun ?QUEUE:out/1,
+ fun (MsgStatus, Q1a,
+ State0 = #vqstate { q3 = Q3, delta = #delta { count = 0,
+ transient = 0 } }) ->
+ State0 #vqstate { q1 = Q1a, q3 = ?QUEUE:in(MsgStatus, Q3) };
+ (MsgStatus, Q1a, State0 = #vqstate { q2 = Q2 }) ->
+ State0 #vqstate { q1 = Q1a, q2 = ?QUEUE:in(MsgStatus, Q2) }
+ end, Quota, State #vqstate.q1, State),
+ {Quota2, State2} =
+ push_alphas_to_betas(
+ fun ?QUEUE:out_r/1,
+ fun (MsgStatus, Q4a, State0 = #vqstate { q3 = Q3 }) ->
+ State0 #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3), q4 = Q4a }
+ end, Quota1, State1 #vqstate.q4, State1),
+ {Quota2, State2}.
+
+push_alphas_to_betas(_Generator, _Consumer, Quota, _Q,
+ State = #vqstate { ram_msg_count = RamMsgCount,
+ target_ram_count = TargetRamCount })
+ when Quota =:= 0 orelse
+ TargetRamCount =:= infinity orelse
+ TargetRamCount >= RamMsgCount ->
+ {Quota, ui(State)};
+push_alphas_to_betas(Generator, Consumer, Quota, Q, State) ->
+ %% We consume credits from the message_store whenever we need to
+ %% persist a message to disk. See:
+ %% rabbit_variable_queue:msg_store_write/4. So perhaps the
+ %% msg_store is trying to throttle down our queue.
+ case credit_flow:blocked() of
+ true -> {Quota, ui(State)};
+ false -> case Generator(Q) of
+ {empty, _Q} ->
+ {Quota, ui(State)};
+ {{value, MsgStatus}, Qa} ->
+ {MsgStatus1, State1} =
+ maybe_prepare_write_to_disk(true, false, MsgStatus,
+ State),
+ MsgStatus2 = m(trim_msg_status(MsgStatus1)),
+ State2 = stats(
+ ready0, {MsgStatus, MsgStatus2}, 0, State1),
+ State3 = Consumer(MsgStatus2, Qa, State2),
+ push_alphas_to_betas(Generator, Consumer, Quota - 1,
+ Qa, State3)
+ end
+ end.
+
+push_betas_to_deltas(Quota, State = #vqstate { mode = default,
+ q2 = Q2,
+ delta = Delta,
+ q3 = Q3}) ->
+ PushState = {Quota, Delta, State},
+ {Q3a, PushState1} = push_betas_to_deltas(
+ fun ?QUEUE:out_r/1,
+ fun rabbit_queue_index:next_segment_boundary/1,
+ Q3, PushState),
+ {Q2a, PushState2} = push_betas_to_deltas(
+ fun ?QUEUE:out/1,
+ fun (Q2MinSeqId) -> Q2MinSeqId end,
+ Q2, PushState1),
+ {_, Delta1, State1} = PushState2,
+ State1 #vqstate { q2 = Q2a,
+ delta = Delta1,
+ q3 = Q3a };
+%% In the case of lazy queues we want to page as many messages as
+%% possible from q3.
+push_betas_to_deltas(Quota, State = #vqstate { mode = lazy,
+ delta = Delta,
+ q3 = Q3}) ->
+ PushState = {Quota, Delta, State},
+ {Q3a, PushState1} = push_betas_to_deltas(
+ fun ?QUEUE:out_r/1,
+ fun (Q2MinSeqId) -> Q2MinSeqId end,
+ Q3, PushState),
+ {_, Delta1, State1} = PushState1,
+ State1 #vqstate { delta = Delta1,
+ q3 = Q3a }.
+
+
+push_betas_to_deltas(Generator, LimitFun, Q, PushState) ->
+ case ?QUEUE:is_empty(Q) of
+ true ->
+ {Q, PushState};
+ false ->
+ {value, #msg_status { seq_id = MinSeqId }} = ?QUEUE:peek(Q),
+ {value, #msg_status { seq_id = MaxSeqId }} = ?QUEUE:peek_r(Q),
+ Limit = LimitFun(MinSeqId),
+ case MaxSeqId < Limit of
+ true -> {Q, PushState};
+ false -> push_betas_to_deltas1(Generator, Limit, Q, PushState)
+ end
+ end.
+
+push_betas_to_deltas1(_Generator, _Limit, Q, {0, Delta, State}) ->
+ {Q, {0, Delta, ui(State)}};
+push_betas_to_deltas1(Generator, Limit, Q, {Quota, Delta, State}) ->
+ case Generator(Q) of
+ {empty, _Q} ->
+ {Q, {Quota, Delta, ui(State)}};
+ {{value, #msg_status { seq_id = SeqId }}, _Qa}
+ when SeqId < Limit ->
+ {Q, {Quota, Delta, ui(State)}};
+ {{value, MsgStatus = #msg_status { seq_id = SeqId }}, Qa} ->
+ {#msg_status { index_on_disk = true,
+ is_persistent = IsPersistent }, State1} =
+ maybe_batch_write_index_to_disk(true, MsgStatus, State),
+ State2 = stats(ready0, {MsgStatus, none}, 1, State1),
+ Delta1 = expand_delta(SeqId, Delta, IsPersistent),
+ push_betas_to_deltas1(Generator, Limit, Qa,
+ {Quota - 1, Delta1, State2})
+ end.
+
+%% Flushes queue index batch caches and updates queue index state.
+ui(#vqstate{index_state = IndexState,
+ target_ram_count = TargetRamCount} = State) ->
+ IndexState1 = rabbit_queue_index:flush_pre_publish_cache(
+ TargetRamCount, IndexState),
+ State#vqstate{index_state = IndexState1}.
+
+%%----------------------------------------------------------------------------
+%% Upgrading
+%%----------------------------------------------------------------------------
+
+-spec multiple_routing_keys() -> 'ok'.
+
+multiple_routing_keys() ->
+ transform_storage(
+ fun ({basic_message, ExchangeName, Routing_Key, Content,
+ MsgId, Persistent}) ->
+ {ok, {basic_message, ExchangeName, [Routing_Key], Content,
+ MsgId, Persistent}};
+ (_) -> {error, corrupt_message}
+ end),
+ ok.
+
+
+%% Assumes message store is not running
+transform_storage(TransformFun) ->
+ transform_store(?PERSISTENT_MSG_STORE, TransformFun),
+ transform_store(?TRANSIENT_MSG_STORE, TransformFun).
+
+transform_store(Store, TransformFun) ->
+ rabbit_msg_store:force_recovery(rabbit_mnesia:dir(), Store),
+ rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun).
+
+move_messages_to_vhost_store() ->
+ case list_persistent_queues() of
+ [] ->
+ log_upgrade("No durable queues found."
+ " Skipping message store migration"),
+ ok;
+ Queues ->
+ move_messages_to_vhost_store(Queues)
+ end,
+ ok = delete_old_store(),
+ ok = rabbit_queue_index:cleanup_global_recovery_terms().
+
+move_messages_to_vhost_store(Queues) ->
+ log_upgrade("Moving messages to per-vhost message store"),
+ %% Move the queue index for each persistent queue to the new store
+ lists:foreach(
+ fun(Queue) ->
+ QueueName = amqqueue:get_name(Queue),
+ rabbit_queue_index:move_to_per_vhost_stores(QueueName)
+ end,
+ Queues),
+ %% Legacy (global) msg_store may require recovery.
+ %% This upgrade step should only be started
+ %% if we are upgrading from a pre-3.7.0 version.
+ {QueuesWithTerms, RecoveryRefs, StartFunState} = read_old_recovery_terms(Queues),
+
+ OldStore = run_old_persistent_store(RecoveryRefs, StartFunState),
+
+ VHosts = rabbit_vhost:list_names(),
+
+ %% New store should not be recovered.
+ NewMsgStore = start_new_store(VHosts),
+ %% Recovery terms should be started for all vhosts for new store.
+ [ok = rabbit_recovery_terms:open_table(VHost) || VHost <- VHosts],
+
+ MigrationBatchSize = application:get_env(rabbit, queue_migration_batch_size,
+ ?QUEUE_MIGRATION_BATCH_SIZE),
+ in_batches(MigrationBatchSize,
+ {rabbit_variable_queue, migrate_queue, [OldStore, NewMsgStore]},
+ QueuesWithTerms,
+ "message_store upgrades: Migrating batch ~p of ~p queues. Out of total ~p ~n",
+ "message_store upgrades: Batch ~p of ~p queues migrated ~n. ~p total left"),
+
+ log_upgrade("Message store migration finished"),
+ ok = rabbit_sup:stop_child(OldStore),
+ [ok= rabbit_recovery_terms:close_table(VHost) || VHost <- VHosts],
+ ok = stop_new_store(NewMsgStore).
+
+in_batches(Size, MFA, List, MessageStart, MessageEnd) ->
+ in_batches(Size, 1, MFA, List, MessageStart, MessageEnd).
+
+in_batches(_, _, _, [], _, _) -> ok;
+in_batches(Size, BatchNum, MFA, List, MessageStart, MessageEnd) ->
+ Length = length(List),
+ {Batch, Tail} = case Size > Length of
+ true -> {List, []};
+ false -> lists:split(Size, List)
+ end,
+ ProcessedLength = (BatchNum - 1) * Size,
+ rabbit_log:info(MessageStart, [BatchNum, Size, ProcessedLength + Length]),
+ {M, F, A} = MFA,
+ Keys = [ rpc:async_call(node(), M, F, [El | A]) || El <- Batch ],
+ lists:foreach(fun(Key) ->
+ case rpc:yield(Key) of
+ {badrpc, Err} -> throw(Err);
+ _ -> ok
+ end
+ end,
+ Keys),
+ rabbit_log:info(MessageEnd, [BatchNum, Size, length(Tail)]),
+ in_batches(Size, BatchNum + 1, MFA, Tail, MessageStart, MessageEnd).
+
+migrate_queue({QueueName = #resource{virtual_host = VHost, name = Name},
+ RecoveryTerm},
+ OldStore, NewStore) ->
+ log_upgrade_verbose(
+ "Migrating messages in queue ~s in vhost ~s to per-vhost message store~n",
+ [Name, VHost]),
+ OldStoreClient = get_global_store_client(OldStore),
+ NewStoreClient = get_per_vhost_store_client(QueueName, NewStore),
+ %% WARNING: During scan_queue_segments queue index state is being recovered
+ %% and terminated. This can cause side effects!
+ rabbit_queue_index:scan_queue_segments(
+ %% We migrate only persistent messages which are found in message store
+ %% and are not acked yet
+ fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, OldC)
+ when is_binary(MsgId) ->
+ migrate_message(MsgId, OldC, NewStoreClient);
+ (_SeqId, _MsgId, _MsgProps,
+ _IsPersistent, _IsDelivered, _IsAcked, OldC) ->
+ OldC
+ end,
+ OldStoreClient,
+ QueueName),
+ rabbit_msg_store:client_terminate(OldStoreClient),
+ rabbit_msg_store:client_terminate(NewStoreClient),
+ NewClientRef = rabbit_msg_store:client_ref(NewStoreClient),
+ case RecoveryTerm of
+ non_clean_shutdown -> ok;
+ Term when is_list(Term) ->
+ NewRecoveryTerm = lists:keyreplace(persistent_ref, 1, RecoveryTerm,
+ {persistent_ref, NewClientRef}),
+ rabbit_queue_index:update_recovery_term(QueueName, NewRecoveryTerm)
+ end,
+ log_upgrade_verbose("Finished migrating queue ~s in vhost ~s", [Name, VHost]),
+ {QueueName, NewClientRef}.
+
+migrate_message(MsgId, OldC, NewC) ->
+ case rabbit_msg_store:read(MsgId, OldC) of
+ {{ok, Msg}, OldC1} ->
+ ok = rabbit_msg_store:write(MsgId, Msg, NewC),
+ OldC1;
+ _ -> OldC
+ end.
+
+get_per_vhost_store_client(#resource{virtual_host = VHost}, NewStore) ->
+ {VHost, StorePid} = lists:keyfind(VHost, 1, NewStore),
+ rabbit_msg_store:client_init(StorePid, rabbit_guid:gen(),
+ fun(_,_) -> ok end, fun() -> ok end).
+
+get_global_store_client(OldStore) ->
+ rabbit_msg_store:client_init(OldStore,
+ rabbit_guid:gen(),
+ fun(_,_) -> ok end,
+ fun() -> ok end).
+
+list_persistent_queues() ->
+ Node = node(),
+ mnesia:async_dirty(
+ fun () ->
+ qlc:e(qlc:q([Q || Q <- mnesia:table(rabbit_durable_queue),
+ ?amqqueue_is_classic(Q),
+ amqqueue:qnode(Q) == Node,
+ mnesia:read(rabbit_queue, amqqueue:get_name(Q), read) =:= []]))
+ end).
+
+read_old_recovery_terms([]) ->
+ {[], [], ?EMPTY_START_FUN_STATE};
+read_old_recovery_terms(Queues) ->
+ QueueNames = [amqqueue:get_name(Q) || Q <- Queues],
+ {AllTerms, StartFunState} = rabbit_queue_index:read_global_recovery_terms(QueueNames),
+ Refs = [Ref || Terms <- AllTerms,
+ Terms /= non_clean_shutdown,
+ begin
+ Ref = proplists:get_value(persistent_ref, Terms),
+ Ref =/= undefined
+ end],
+ {lists:zip(QueueNames, AllTerms), Refs, StartFunState}.
+
+run_old_persistent_store(Refs, StartFunState) ->
+ OldStoreName = ?PERSISTENT_MSG_STORE,
+ ok = rabbit_sup:start_child(OldStoreName, rabbit_msg_store, start_global_store_link,
+ [OldStoreName, rabbit_mnesia:dir(),
+ Refs, StartFunState]),
+ OldStoreName.
+
+start_new_store(VHosts) ->
+ %% Ensure vhost supervisor is started, so we can add vhosts to it.
+ lists:map(fun(VHost) ->
+ VHostDir = rabbit_vhost:msg_store_dir_path(VHost),
+ {ok, Pid} = rabbit_msg_store:start_link(?PERSISTENT_MSG_STORE,
+ VHostDir,
+ undefined,
+ ?EMPTY_START_FUN_STATE),
+ {VHost, Pid}
+ end,
+ VHosts).
+
+stop_new_store(NewStore) ->
+ lists:foreach(fun({_VHost, StorePid}) ->
+ unlink(StorePid),
+ exit(StorePid, shutdown)
+ end,
+ NewStore),
+ ok.
+
+delete_old_store() ->
+ log_upgrade("Removing the old message store data"),
+ rabbit_file:recursive_delete(
+ [filename:join([rabbit_mnesia:dir(), ?PERSISTENT_MSG_STORE])]),
+ %% Delete old transient store as well
+ rabbit_file:recursive_delete(
+ [filename:join([rabbit_mnesia:dir(), ?TRANSIENT_MSG_STORE])]),
+ ok.
+
+log_upgrade(Msg) ->
+ log_upgrade(Msg, []).
+
+log_upgrade(Msg, Args) ->
+ rabbit_log:info("message_store upgrades: " ++ Msg, Args).
+
+log_upgrade_verbose(Msg) ->
+ log_upgrade_verbose(Msg, []).
+
+log_upgrade_verbose(Msg, Args) ->
+ rabbit_log_upgrade:info(Msg, Args).
+
+maybe_client_terminate(MSCStateP) ->
+ %% Queue might have been asked to stop by the supervisor, it needs a clean
+ %% shutdown in order for the supervising strategy to work - if it reaches max
+ %% restarts might bring the vhost down.
+ try
+ rabbit_msg_store:client_terminate(MSCStateP)
+ catch
+ _:_ ->
+ ok
+ end.
diff --git a/deps/rabbit/src/rabbit_version.erl b/deps/rabbit/src/rabbit_version.erl
new file mode 100644
index 0000000000..3f5462c7b4
--- /dev/null
+++ b/deps/rabbit/src/rabbit_version.erl
@@ -0,0 +1,227 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_version).
+
+-export([recorded/0, matches/2, desired/0, desired_for_scope/1,
+ record_desired/0, record_desired_for_scope/1,
+ upgrades_required/1, all_upgrades_required/1,
+ check_version_consistency/3,
+ check_version_consistency/4, check_otp_consistency/1,
+ version_error/3]).
+
+%% -------------------------------------------------------------------
+
+-export_type([scope/0, step/0]).
+
+-type scope() :: atom().
+-type scope_version() :: [atom()].
+-type step() :: {atom(), atom()}.
+
+-type version() :: [atom()].
+
+%% -------------------------------------------------------------------
+
+-define(VERSION_FILENAME, "schema_version").
+-define(SCOPES, [mnesia, local]).
+
+%% -------------------------------------------------------------------
+
+-spec recorded() -> rabbit_types:ok_or_error2(version(), any()).
+
+recorded() -> case rabbit_file:read_term_file(schema_filename()) of
+ {ok, [V]} -> {ok, V};
+ {error, _} = Err -> Err
+ end.
+
+record(V) -> ok = rabbit_file:write_term_file(schema_filename(), [V]).
+
+recorded_for_scope(Scope) ->
+ case recorded() of
+ {error, _} = Err ->
+ Err;
+ {ok, Version} ->
+ {ok, case lists:keysearch(Scope, 1, categorise_by_scope(Version)) of
+ false -> [];
+ {value, {Scope, SV1}} -> SV1
+ end}
+ end.
+
+record_for_scope(Scope, ScopeVersion) ->
+ case recorded() of
+ {error, _} = Err ->
+ Err;
+ {ok, Version} ->
+ Version1 = lists:keystore(Scope, 1, categorise_by_scope(Version),
+ {Scope, ScopeVersion}),
+ ok = record([Name || {_Scope, Names} <- Version1, Name <- Names])
+ end.
+
+%% -------------------------------------------------------------------
+
+-spec matches([A], [A]) -> boolean().
+
+matches(VerA, VerB) ->
+ lists:usort(VerA) =:= lists:usort(VerB).
+
+%% -------------------------------------------------------------------
+
+-spec desired() -> version().
+
+desired() -> [Name || Scope <- ?SCOPES, Name <- desired_for_scope(Scope)].
+
+-spec desired_for_scope(scope()) -> scope_version().
+
+desired_for_scope(Scope) -> with_upgrade_graph(fun heads/1, Scope).
+
+-spec record_desired() -> 'ok'.
+
+record_desired() -> record(desired()).
+
+-spec record_desired_for_scope
+ (scope()) -> rabbit_types:ok_or_error(any()).
+
+record_desired_for_scope(Scope) ->
+ record_for_scope(Scope, desired_for_scope(Scope)).
+
+-spec upgrades_required
+ (scope()) -> rabbit_types:ok_or_error2([step()], any()).
+
+upgrades_required(Scope) ->
+ case recorded_for_scope(Scope) of
+ {error, enoent} ->
+ case filelib:is_file(rabbit_guid:filename()) of
+ false -> {error, starting_from_scratch};
+ true -> {error, version_not_available}
+ end;
+ {ok, CurrentHeads} ->
+ with_upgrade_graph(
+ fun (G) ->
+ case unknown_heads(CurrentHeads, G) of
+ [] -> {ok, upgrades_to_apply(CurrentHeads, G)};
+ Unknown -> {error, {future_upgrades_found, Unknown}}
+ end
+ end, Scope)
+ end.
+
+all_upgrades_required(Scopes) ->
+ case recorded() of
+ {error, enoent} ->
+ case filelib:is_file(rabbit_guid:filename()) of
+ false -> {error, starting_from_scratch};
+ true -> {error, version_not_available}
+ end;
+ {ok, _} ->
+ lists:foldl(
+ fun
+ (_, {error, Err}) -> {error, Err};
+ (Scope, {ok, Acc}) ->
+ case upgrades_required(Scope) of
+ %% Lift errors from any scope.
+ {error, Err} -> {error, Err};
+ %% Filter non-upgradable scopes
+ {ok, []} -> {ok, Acc};
+ {ok, Upgrades} -> {ok, [{Scope, Upgrades} | Acc]}
+ end
+ end,
+ {ok, []},
+ Scopes)
+ end.
+
+%% -------------------------------------------------------------------
+
+with_upgrade_graph(Fun, Scope) ->
+ case rabbit_misc:build_acyclic_graph(
+ fun ({_App, Module, Steps}) -> vertices(Module, Steps, Scope) end,
+ fun ({_App, Module, Steps}) -> edges(Module, Steps, Scope) end,
+ rabbit_misc:all_module_attributes(rabbit_upgrade)) of
+ {ok, G} -> try
+ Fun(G)
+ after
+ true = digraph:delete(G)
+ end;
+ {error, {vertex, duplicate, StepName}} ->
+ throw({error, {duplicate_upgrade_step, StepName}});
+ {error, {edge, {bad_vertex, StepName}, _From, _To}} ->
+ throw({error, {dependency_on_unknown_upgrade_step, StepName}});
+ {error, {edge, {bad_edge, StepNames}, _From, _To}} ->
+ throw({error, {cycle_in_upgrade_steps, StepNames}})
+ end.
+
+vertices(Module, Steps, Scope0) ->
+ [{StepName, {Module, StepName}} || {StepName, Scope1, _Reqs} <- Steps,
+ Scope0 == Scope1].
+
+edges(_Module, Steps, Scope0) ->
+ [{Require, StepName} || {StepName, Scope1, Requires} <- Steps,
+ Require <- Requires,
+ Scope0 == Scope1].
+unknown_heads(Heads, G) ->
+ [H || H <- Heads, digraph:vertex(G, H) =:= false].
+
+upgrades_to_apply(Heads, G) ->
+ %% Take all the vertices which can reach the known heads. That's
+ %% everything we've already applied. Subtract that from all
+ %% vertices: that's what we have to apply.
+ Unsorted = sets:to_list(
+ sets:subtract(
+ sets:from_list(digraph:vertices(G)),
+ sets:from_list(digraph_utils:reaching(Heads, G)))),
+ %% Form a subgraph from that list and find a topological ordering
+ %% so we can invoke them in order.
+ [element(2, digraph:vertex(G, StepName)) ||
+ StepName <- digraph_utils:topsort(digraph_utils:subgraph(G, Unsorted))].
+
+heads(G) ->
+ lists:sort([V || V <- digraph:vertices(G), digraph:out_degree(G, V) =:= 0]).
+
+%% -------------------------------------------------------------------
+
+categorise_by_scope(Version) when is_list(Version) ->
+ Categorised =
+ [{Scope, Name} || {_App, _Module, Attributes} <-
+ rabbit_misc:all_module_attributes(rabbit_upgrade),
+ {Name, Scope, _Requires} <- Attributes,
+ lists:member(Name, Version)],
+ maps:to_list(
+ lists:foldl(fun ({Scope, Name}, CatVersion) ->
+ rabbit_misc:maps_cons(Scope, Name, CatVersion)
+ end, maps:new(), Categorised)).
+
+dir() -> rabbit_mnesia:dir().
+
+schema_filename() -> filename:join(dir(), ?VERSION_FILENAME).
+
+%% --------------------------------------------------------------------
+
+-spec check_version_consistency
+ (string(), string(), string()) -> rabbit_types:ok_or_error(any()).
+
+check_version_consistency(This, Remote, Name) ->
+ check_version_consistency(This, Remote, Name, fun (A, B) -> A =:= B end).
+
+-spec check_version_consistency
+ (string(), string(), string(),
+ fun((string(), string()) -> boolean())) ->
+ rabbit_types:ok_or_error(any()).
+
+check_version_consistency(This, Remote, Name, Comp) ->
+ case Comp(This, Remote) of
+ true -> ok;
+ false -> version_error(Name, This, Remote)
+ end.
+
+version_error(Name, This, Remote) ->
+ {error, {inconsistent_cluster,
+ rabbit_misc:format("~s version mismatch: local node is ~s, "
+ "remote node ~s", [Name, This, Remote])}}.
+
+-spec check_otp_consistency
+ (string()) -> rabbit_types:ok_or_error(any()).
+
+check_otp_consistency(Remote) ->
+ check_version_consistency(rabbit_misc:otp_release(), Remote, "OTP").
diff --git a/deps/rabbit/src/rabbit_vhost.erl b/deps/rabbit/src/rabbit_vhost.erl
new file mode 100644
index 0000000000..c8c5fc961a
--- /dev/null
+++ b/deps/rabbit/src/rabbit_vhost.erl
@@ -0,0 +1,422 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_vhost).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("vhost.hrl").
+
+-export([recover/0, recover/1]).
+-export([add/2, add/4, delete/2, exists/1, with/2, with_user_and_vhost/3, assert/1, update/2,
+ set_limits/2, vhost_cluster_state/1, is_running_on_all_nodes/1, await_running_on_all_nodes/2,
+ list/0, count/0, list_names/0, all/0, parse_tags/1]).
+-export([info/1, info/2, info_all/0, info_all/1, info_all/2, info_all/3]).
+-export([dir/1, msg_store_dir_path/1, msg_store_dir_wildcard/0]).
+-export([delete_storage/1]).
+-export([vhost_down/1]).
+-export([put_vhost/5]).
+
+%%
+%% API
+%%
+
+recover() ->
+ %% Clear out remnants of old incarnation, in case we restarted
+ %% faster than other nodes handled DOWN messages from us.
+ rabbit_amqqueue:on_node_down(node()),
+
+ rabbit_amqqueue:warn_file_limit(),
+
+ %% Prepare rabbit_semi_durable_route table
+ rabbit_binding:recover(),
+
+ %% rabbit_vhost_sup_sup will start the actual recovery.
+ %% So recovery will be run every time a vhost supervisor is restarted.
+ ok = rabbit_vhost_sup_sup:start(),
+
+ [ok = rabbit_vhost_sup_sup:init_vhost(VHost) || VHost <- list_names()],
+ ok.
+
+recover(VHost) ->
+ VHostDir = msg_store_dir_path(VHost),
+ rabbit_log:info("Making sure data directory '~ts' for vhost '~s' exists~n",
+ [VHostDir, VHost]),
+ VHostStubFile = filename:join(VHostDir, ".vhost"),
+ ok = rabbit_file:ensure_dir(VHostStubFile),
+ ok = file:write_file(VHostStubFile, VHost),
+ {Recovered, Failed} = rabbit_amqqueue:recover(VHost),
+ AllQs = Recovered ++ Failed,
+ QNames = [amqqueue:get_name(Q) || Q <- AllQs],
+ ok = rabbit_binding:recover(rabbit_exchange:recover(VHost), QNames),
+ ok = rabbit_amqqueue:start(Recovered),
+ %% Start queue mirrors.
+ ok = rabbit_mirror_queue_misc:on_vhost_up(VHost),
+ ok.
+
+-define(INFO_KEYS, vhost:info_keys()).
+
+-spec parse_tags(binary() | string() | atom()) -> [atom()].
+parse_tags(undefined) ->
+ [];
+parse_tags("") ->
+ [];
+parse_tags(<<"">>) ->
+ [];
+parse_tags(Val) when is_binary(Val) ->
+ parse_tags(rabbit_data_coercion:to_list(Val));
+parse_tags(Val) when is_list(Val) ->
+ [trim_tag(Tag) || Tag <- re:split(Val, ",", [{return, list}])].
+
+-spec add(vhost:name(), rabbit_types:username()) -> rabbit_types:ok_or_error(any()).
+
+add(VHost, ActingUser) ->
+ case exists(VHost) of
+ true -> ok;
+ false -> do_add(VHost, <<"">>, [], ActingUser)
+ end.
+
+-spec add(vhost:name(), binary(), [atom()], rabbit_types:username()) -> rabbit_types:ok_or_error(any()).
+
+add(Name, Description, Tags, ActingUser) ->
+ case exists(Name) of
+ true -> ok;
+ false -> do_add(Name, Description, Tags, ActingUser)
+ end.
+
+do_add(Name, Description, Tags, ActingUser) ->
+ case Description of
+ undefined ->
+ rabbit_log:info("Adding vhost '~s' without a description", [Name]);
+ Value ->
+ rabbit_log:info("Adding vhost '~s' (description: '~s')", [Name, Value])
+ end,
+ VHost = rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ case mnesia:wread({rabbit_vhost, Name}) of
+ [] ->
+ Row = vhost:new(Name, [], #{description => Description, tags => Tags}),
+ rabbit_log:debug("Inserting a virtual host record ~p", [Row]),
+ ok = mnesia:write(rabbit_vhost, Row, write),
+ Row;
+ %% the vhost already exists
+ [Row] ->
+ Row
+ end
+ end,
+ fun (VHost1, true) ->
+ VHost1;
+ (VHost1, false) ->
+ [begin
+ Resource = rabbit_misc:r(Name, exchange, ExchangeName),
+ rabbit_log:debug("Will declare an exchange ~p", [Resource]),
+ _ = rabbit_exchange:declare(Resource, Type, true, false, Internal, [], ActingUser)
+ end || {ExchangeName, Type, Internal} <-
+ [{<<"">>, direct, false},
+ {<<"amq.direct">>, direct, false},
+ {<<"amq.topic">>, topic, false},
+ %% per 0-9-1 pdf
+ {<<"amq.match">>, headers, false},
+ %% per 0-9-1 xml
+ {<<"amq.headers">>, headers, false},
+ {<<"amq.fanout">>, fanout, false},
+ {<<"amq.rabbitmq.trace">>, topic, true}]],
+ VHost1
+ end),
+ case rabbit_vhost_sup_sup:start_on_all_nodes(Name) of
+ ok ->
+ rabbit_event:notify(vhost_created, info(VHost)
+ ++ [{user_who_performed_action, ActingUser},
+ {description, Description},
+ {tags, Tags}]),
+ ok;
+ {error, Reason} ->
+ Msg = rabbit_misc:format("failed to set up vhost '~s': ~p",
+ [Name, Reason]),
+ {error, Msg}
+ end.
+
+-spec delete(vhost:name(), rabbit_types:username()) -> rabbit_types:ok_or_error(any()).
+
+delete(VHost, ActingUser) ->
+ %% FIXME: We are forced to delete the queues and exchanges outside
+ %% the TX below. Queue deletion involves sending messages to the queue
+ %% process, which in turn results in further mnesia actions and
+ %% eventually the termination of that process. Exchange deletion causes
+ %% notifications which must be sent outside the TX
+ rabbit_log:info("Deleting vhost '~s'~n", [VHost]),
+ QDelFun = fun (Q) -> rabbit_amqqueue:delete(Q, false, false, ActingUser) end,
+ [begin
+ Name = amqqueue:get_name(Q),
+ assert_benign(rabbit_amqqueue:with(Name, QDelFun), ActingUser)
+ end || Q <- rabbit_amqqueue:list(VHost)],
+ [assert_benign(rabbit_exchange:delete(Name, false, ActingUser), ActingUser) ||
+ #exchange{name = Name} <- rabbit_exchange:list(VHost)],
+ Funs = rabbit_misc:execute_mnesia_transaction(
+ with(VHost, fun () -> internal_delete(VHost, ActingUser) end)),
+ ok = rabbit_event:notify(vhost_deleted, [{name, VHost},
+ {user_who_performed_action, ActingUser}]),
+ [case Fun() of
+ ok -> ok;
+ {error, {no_such_vhost, VHost}} -> ok
+ end || Fun <- Funs],
+ %% After vhost was deleted from mnesia DB, we try to stop vhost supervisors
+ %% on all the nodes.
+ rabbit_vhost_sup_sup:delete_on_all_nodes(VHost),
+ ok.
+
+put_vhost(Name, Description, Tags0, Trace, Username) ->
+ Tags = case Tags0 of
+ undefined -> <<"">>;
+ null -> <<"">>;
+ "undefined" -> <<"">>;
+ "null" -> <<"">>;
+ Other -> Other
+ end,
+ Result = case exists(Name) of
+ true -> ok;
+ false -> add(Name, Description, parse_tags(Tags), Username),
+ %% wait for up to 45 seconds for the vhost to initialise
+ %% on all nodes
+ case await_running_on_all_nodes(Name, 45000) of
+ ok ->
+ maybe_grant_full_permissions(Name, Username);
+ {error, timeout} ->
+ {error, timeout}
+ end
+ end,
+ case Trace of
+ true -> rabbit_trace:start(Name);
+ false -> rabbit_trace:stop(Name);
+ undefined -> ok
+ end,
+ Result.
+
+%% when definitions are loaded on boot, Username here will be ?INTERNAL_USER,
+%% which does not actually exist
+maybe_grant_full_permissions(_Name, ?INTERNAL_USER) ->
+ ok;
+maybe_grant_full_permissions(Name, Username) ->
+ U = rabbit_auth_backend_internal:lookup_user(Username),
+ maybe_grant_full_permissions(U, Name, Username).
+
+maybe_grant_full_permissions({ok, _}, Name, Username) ->
+ rabbit_auth_backend_internal:set_permissions(
+ Username, Name, <<".*">>, <<".*">>, <<".*">>, Username);
+maybe_grant_full_permissions(_, _Name, _Username) ->
+ ok.
+
+
+%% 50 ms
+-define(AWAIT_SAMPLE_INTERVAL, 50).
+
+-spec await_running_on_all_nodes(vhost:name(), integer()) -> ok | {error, timeout}.
+await_running_on_all_nodes(VHost, Timeout) ->
+ Attempts = round(Timeout / ?AWAIT_SAMPLE_INTERVAL),
+ await_running_on_all_nodes0(VHost, Attempts).
+
+await_running_on_all_nodes0(_VHost, 0) ->
+ {error, timeout};
+await_running_on_all_nodes0(VHost, Attempts) ->
+ case is_running_on_all_nodes(VHost) of
+ true -> ok;
+ _ ->
+ timer:sleep(?AWAIT_SAMPLE_INTERVAL),
+ await_running_on_all_nodes0(VHost, Attempts - 1)
+ end.
+
+-spec is_running_on_all_nodes(vhost:name()) -> boolean().
+is_running_on_all_nodes(VHost) ->
+ States = vhost_cluster_state(VHost),
+ lists:all(fun ({_Node, State}) -> State =:= running end,
+ States).
+
+-spec vhost_cluster_state(vhost:name()) -> [{atom(), atom()}].
+vhost_cluster_state(VHost) ->
+ Nodes = rabbit_nodes:all_running(),
+ lists:map(fun(Node) ->
+ State = case rabbit_misc:rpc_call(Node,
+ rabbit_vhost_sup_sup, is_vhost_alive,
+ [VHost]) of
+ {badrpc, nodedown} -> nodedown;
+ true -> running;
+ false -> stopped
+ end,
+ {Node, State}
+ end,
+ Nodes).
+
+vhost_down(VHost) ->
+ ok = rabbit_event:notify(vhost_down,
+ [{name, VHost},
+ {node, node()},
+ {user_who_performed_action, ?INTERNAL_USER}]).
+
+delete_storage(VHost) ->
+ VhostDir = msg_store_dir_path(VHost),
+ rabbit_log:info("Deleting message store directory for vhost '~s' at '~s'~n", [VHost, VhostDir]),
+ %% Message store should be closed when vhost supervisor is closed.
+ case rabbit_file:recursive_delete([VhostDir]) of
+ ok -> ok;
+ {error, {_, enoent}} ->
+ %% a concurrent delete did the job for us
+ rabbit_log:warning("Tried to delete storage directories for vhost '~s', it failed with an ENOENT", [VHost]),
+ ok;
+ Other ->
+ rabbit_log:warning("Tried to delete storage directories for vhost '~s': ~p", [VHost, Other]),
+ Other
+ end.
+
+assert_benign(ok, _) -> ok;
+assert_benign({ok, _}, _) -> ok;
+assert_benign({ok, _, _}, _) -> ok;
+assert_benign({error, not_found}, _) -> ok;
+assert_benign({error, {absent, Q, _}}, ActingUser) ->
+ %% Removing the mnesia entries here is safe. If/when the down node
+ %% restarts, it will clear out the on-disk storage of the queue.
+ QName = amqqueue:get_name(Q),
+ rabbit_amqqueue:internal_delete(QName, ActingUser).
+
+internal_delete(VHost, ActingUser) ->
+ [ok = rabbit_auth_backend_internal:clear_permissions(
+ proplists:get_value(user, Info), VHost, ActingUser)
+ || Info <- rabbit_auth_backend_internal:list_vhost_permissions(VHost)],
+ TopicPermissions = rabbit_auth_backend_internal:list_vhost_topic_permissions(VHost),
+ [ok = rabbit_auth_backend_internal:clear_topic_permissions(
+ proplists:get_value(user, TopicPermission), VHost, ActingUser)
+ || TopicPermission <- TopicPermissions],
+ Fs1 = [rabbit_runtime_parameters:clear(VHost,
+ proplists:get_value(component, Info),
+ proplists:get_value(name, Info),
+ ActingUser)
+ || Info <- rabbit_runtime_parameters:list(VHost)],
+ Fs2 = [rabbit_policy:delete(VHost, proplists:get_value(name, Info), ActingUser)
+ || Info <- rabbit_policy:list(VHost)],
+ ok = mnesia:delete({rabbit_vhost, VHost}),
+ Fs1 ++ Fs2.
+
+-spec exists(vhost:name()) -> boolean().
+
+exists(VHost) ->
+ mnesia:dirty_read({rabbit_vhost, VHost}) /= [].
+
+-spec list_names() -> [vhost:name()].
+list_names() -> mnesia:dirty_all_keys(rabbit_vhost).
+
+%% Exists for backwards compatibility, prefer list_names/0.
+-spec list() -> [vhost:name()].
+list() -> list_names().
+
+-spec all() -> [vhost:vhost()].
+all() -> mnesia:dirty_match_object(rabbit_vhost, vhost:pattern_match_all()).
+
+-spec count() -> non_neg_integer().
+count() ->
+ length(list()).
+
+-spec with(vhost:name(), rabbit_misc:thunk(A)) -> A.
+
+with(VHost, Thunk) ->
+ fun () ->
+ case mnesia:read({rabbit_vhost, VHost}) of
+ [] ->
+ mnesia:abort({no_such_vhost, VHost});
+ [_V] ->
+ Thunk()
+ end
+ end.
+
+-spec with_user_and_vhost
+ (rabbit_types:username(), vhost:name(), rabbit_misc:thunk(A)) -> A.
+
+with_user_and_vhost(Username, VHost, Thunk) ->
+ rabbit_misc:with_user(Username, with(VHost, Thunk)).
+
+%% Like with/2 but outside an Mnesia tx
+
+-spec assert(vhost:name()) -> 'ok'.
+
+assert(VHost) -> case exists(VHost) of
+ true -> ok;
+ false -> throw({error, {no_such_vhost, VHost}})
+ end.
+
+-spec update(vhost:name(), fun((vhost:vhost()) -> vhost:vhost())) -> vhost:vhost().
+
+update(VHost, Fun) ->
+ case mnesia:read({rabbit_vhost, VHost}) of
+ [] ->
+ mnesia:abort({no_such_vhost, VHost});
+ [V] ->
+ V1 = Fun(V),
+ ok = mnesia:write(rabbit_vhost, V1, write),
+ V1
+ end.
+
+set_limits(VHost, undefined) ->
+ vhost:set_limits(VHost, []);
+set_limits(VHost, Limits) ->
+ vhost:set_limits(VHost, Limits).
+
+
+dir(Vhost) ->
+ <<Num:128>> = erlang:md5(Vhost),
+ rabbit_misc:format("~.36B", [Num]).
+
+msg_store_dir_path(VHost) ->
+ EncodedName = dir(VHost),
+ rabbit_data_coercion:to_list(filename:join([msg_store_dir_base(), EncodedName])).
+
+msg_store_dir_wildcard() ->
+ rabbit_data_coercion:to_list(filename:join([msg_store_dir_base(), "*"])).
+
+msg_store_dir_base() ->
+ Dir = rabbit_mnesia:dir(),
+ filename:join([Dir, "msg_stores", "vhosts"]).
+
+-spec trim_tag(list() | binary() | atom()) -> atom().
+trim_tag(Val) ->
+ rabbit_data_coercion:to_atom(string:trim(rabbit_data_coercion:to_list(Val))).
+
+%%----------------------------------------------------------------------------
+
+infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items].
+
+i(name, VHost) -> vhost:get_name(VHost);
+i(tracing, VHost) -> rabbit_trace:enabled(vhost:get_name(VHost));
+i(cluster_state, VHost) -> vhost_cluster_state(vhost:get_name(VHost));
+i(description, VHost) -> vhost:get_description(VHost);
+i(tags, VHost) -> vhost:get_tags(VHost);
+i(metadata, VHost) -> vhost:get_metadata(VHost);
+i(Item, VHost) ->
+ rabbit_log:error("Don't know how to compute a virtual host info item '~s' for virtual host '~p'", [Item, VHost]),
+ throw({bad_argument, Item}).
+
+-spec info(vhost:vhost() | vhost:name()) -> rabbit_types:infos().
+
+info(VHost) when ?is_vhost(VHost) ->
+ infos(?INFO_KEYS, VHost);
+info(Key) ->
+ case mnesia:dirty_read({rabbit_vhost, Key}) of
+ [] -> [];
+ [VHost] -> infos(?INFO_KEYS, VHost)
+ end.
+
+-spec info(vhost:vhost(), rabbit_types:info_keys()) -> rabbit_types:infos().
+info(VHost, Items) -> infos(Items, VHost).
+
+-spec info_all() -> [rabbit_types:infos()].
+info_all() -> info_all(?INFO_KEYS).
+
+-spec info_all(rabbit_types:info_keys()) -> [rabbit_types:infos()].
+info_all(Items) -> [info(VHost, Items) || VHost <- all()].
+
+info_all(Ref, AggregatorPid) -> info_all(?INFO_KEYS, Ref, AggregatorPid).
+
+-spec info_all(rabbit_types:info_keys(), reference(), pid()) -> 'ok'.
+info_all(Items, Ref, AggregatorPid) ->
+ rabbit_control_misc:emitting_map(
+ AggregatorPid, Ref, fun(VHost) -> info(VHost, Items) end, all()).
diff --git a/deps/rabbit/src/rabbit_vhost_limit.erl b/deps/rabbit/src/rabbit_vhost_limit.erl
new file mode 100644
index 0000000000..bee01f3054
--- /dev/null
+++ b/deps/rabbit/src/rabbit_vhost_limit.erl
@@ -0,0 +1,205 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_vhost_limit).
+
+-behaviour(rabbit_runtime_parameter).
+
+-include("rabbit.hrl").
+
+-export([register/0]).
+-export([parse_set/3, set/3, clear/2]).
+-export([list/0, list/1]).
+-export([update_limit/4, clear_limit/3, get_limit/2]).
+-export([validate/5, notify/5, notify_clear/4]).
+-export([connection_limit/1, queue_limit/1,
+ is_over_queue_limit/1, would_exceed_queue_limit/2,
+ is_over_connection_limit/1]).
+
+-import(rabbit_misc, [pget/2, pget/3]).
+
+-rabbit_boot_step({?MODULE,
+ [{description, "vhost limit parameters"},
+ {mfa, {rabbit_vhost_limit, register, []}},
+ {requires, rabbit_registry},
+ {enables, recovery}]}).
+
+%%----------------------------------------------------------------------------
+
+register() ->
+ rabbit_registry:register(runtime_parameter, <<"vhost-limits">>, ?MODULE).
+
+validate(_VHost, <<"vhost-limits">>, Name, Term, _User) ->
+ rabbit_parameter_validation:proplist(
+ Name, vhost_limit_validation(), Term).
+
+notify(VHost, <<"vhost-limits">>, <<"limits">>, Limits, ActingUser) ->
+ rabbit_event:notify(vhost_limits_set, [{name, <<"limits">>},
+ {user_who_performed_action, ActingUser}
+ | Limits]),
+ update_vhost(VHost, Limits).
+
+notify_clear(VHost, <<"vhost-limits">>, <<"limits">>, ActingUser) ->
+ rabbit_event:notify(vhost_limits_cleared, [{name, <<"limits">>},
+ {user_who_performed_action, ActingUser}]),
+ %% If the function is called as a part of vhost deletion, the vhost can
+ %% be already deleted.
+ case rabbit_vhost:exists(VHost) of
+ true -> update_vhost(VHost, undefined);
+ false -> ok
+ end.
+
+connection_limit(VirtualHost) ->
+ get_limit(VirtualHost, <<"max-connections">>).
+
+queue_limit(VirtualHost) ->
+ get_limit(VirtualHost, <<"max-queues">>).
+
+
+query_limits(VHost) ->
+ case rabbit_runtime_parameters:list(VHost, <<"vhost-limits">>) of
+ [] -> [];
+ Params -> [ {pget(vhost, Param), pget(value, Param)}
+ || Param <- Params,
+ pget(value, Param) =/= undefined,
+ pget(name, Param) == <<"limits">> ]
+ end.
+
+
+-spec list() -> [{vhost:name(), rabbit_types:infos()}].
+list() ->
+ query_limits('_').
+
+-spec list(vhost:name()) -> rabbit_types:infos().
+list(VHost) ->
+ case query_limits(VHost) of
+ [] -> [];
+ [{VHost, Value}] -> Value
+ end.
+
+-spec is_over_connection_limit(vhost:name()) -> {true, non_neg_integer()} | false.
+
+is_over_connection_limit(VirtualHost) ->
+ case rabbit_vhost_limit:connection_limit(VirtualHost) of
+ %% no limit configured
+ undefined -> false;
+ %% with limit = 0, no connections are allowed
+ {ok, 0} -> {true, 0};
+ {ok, Limit} when is_integer(Limit) andalso Limit > 0 ->
+ ConnectionCount =
+ rabbit_connection_tracking:count_tracked_items_in({vhost, VirtualHost}),
+ case ConnectionCount >= Limit of
+ false -> false;
+ true -> {true, Limit}
+ end;
+ %% any negative value means "no limit". Note that parameter validation
+ %% will replace negative integers with 'undefined', so this is to be
+ %% explicit and extra defensive
+ {ok, Limit} when is_integer(Limit) andalso Limit < 0 -> false;
+ %% ignore non-integer limits
+ {ok, _Limit} -> false
+ end.
+
+-spec would_exceed_queue_limit(non_neg_integer(), vhost:name()) ->
+ {true, non_neg_integer(), non_neg_integer()} | false.
+
+would_exceed_queue_limit(AdditionalCount, VirtualHost) ->
+ case queue_limit(VirtualHost) of
+ undefined ->
+ %% no limit configured
+ false;
+ {ok, 0} ->
+ %% with limit = 0, no queues can be declared (perhaps not very
+ %% useful but consistent with the connection limit)
+ {true, 0, 0};
+ {ok, Limit} when is_integer(Limit) andalso Limit > 0 ->
+ QueueCount = rabbit_amqqueue:count(VirtualHost),
+ case (AdditionalCount + QueueCount) > Limit of
+ false -> false;
+ true -> {true, Limit, QueueCount}
+ end;
+ {ok, Limit} when is_integer(Limit) andalso Limit < 0 ->
+ %% any negative value means "no limit". Note that parameter validation
+ %% will replace negative integers with 'undefined', so this is to be
+ %% explicit and extra defensive
+ false;
+ {ok, _Limit} ->
+ %% ignore non-integer limits
+ false
+ end.
+
+-spec is_over_queue_limit(vhost:name()) -> {true, non_neg_integer()} | false.
+
+is_over_queue_limit(VirtualHost) ->
+ case would_exceed_queue_limit(1, VirtualHost) of
+ {true, Limit, _QueueCount} -> {true, Limit};
+ false -> false
+ end.
+
+%%----------------------------------------------------------------------------
+
+parse_set(VHost, Defn, ActingUser) ->
+ Definition = rabbit_data_coercion:to_binary(Defn),
+ case rabbit_json:try_decode(Definition) of
+ {ok, Term} ->
+ set(VHost, maps:to_list(Term), ActingUser);
+ {error, Reason} ->
+ {error_string,
+ rabbit_misc:format("JSON decoding error. Reason: ~ts", [Reason])}
+ end.
+
+set(VHost, Defn, ActingUser) ->
+ rabbit_runtime_parameters:set_any(VHost, <<"vhost-limits">>,
+ <<"limits">>, Defn, ActingUser).
+
+clear(VHost, ActingUser) ->
+ rabbit_runtime_parameters:clear_any(VHost, <<"vhost-limits">>,
+ <<"limits">>, ActingUser).
+
+update_limit(VHost, Name, Value, ActingUser) ->
+ OldDef = case rabbit_runtime_parameters:list(VHost, <<"vhost-limits">>) of
+ [] -> [];
+ [Param] -> pget(value, Param, [])
+ end,
+ NewDef = [{Name, Value} | lists:keydelete(Name, 1, OldDef)],
+ set(VHost, NewDef, ActingUser).
+
+clear_limit(VHost, Name, ActingUser) ->
+ OldDef = case rabbit_runtime_parameters:list(VHost, <<"vhost-limits">>) of
+ [] -> [];
+ [Param] -> pget(value, Param, [])
+ end,
+ NewDef = lists:keydelete(Name, 1, OldDef),
+ set(VHost, NewDef, ActingUser).
+
+vhost_limit_validation() ->
+ [{<<"max-connections">>, fun rabbit_parameter_validation:integer/2, optional},
+ {<<"max-queues">>, fun rabbit_parameter_validation:integer/2, optional}].
+
+update_vhost(VHostName, Limits) ->
+ rabbit_misc:execute_mnesia_transaction(
+ fun() ->
+ rabbit_vhost:update(VHostName,
+ fun(VHost) ->
+ rabbit_vhost:set_limits(VHost, Limits)
+ end)
+ end),
+ ok.
+
+get_limit(VirtualHost, Limit) ->
+ case rabbit_runtime_parameters:list(VirtualHost, <<"vhost-limits">>) of
+ [] -> undefined;
+ [Param] -> case pget(value, Param) of
+ undefined -> undefined;
+ Val -> case pget(Limit, Val) of
+ undefined -> undefined;
+ %% no limit
+ N when N < 0 -> undefined;
+ N when N >= 0 -> {ok, N}
+ end
+ end
+ end.
diff --git a/deps/rabbit/src/rabbit_vhost_msg_store.erl b/deps/rabbit/src/rabbit_vhost_msg_store.erl
new file mode 100644
index 0000000000..8667b4d143
--- /dev/null
+++ b/deps/rabbit/src/rabbit_vhost_msg_store.erl
@@ -0,0 +1,68 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_vhost_msg_store).
+
+-include("rabbit.hrl").
+
+-export([start/4, stop/2, client_init/5, successfully_recovered_state/2]).
+-export([vhost_store_pid/2]).
+
+start(VHost, Type, ClientRefs, StartupFunState) when is_list(ClientRefs);
+ ClientRefs == undefined ->
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of
+ {ok, VHostSup} ->
+ VHostDir = rabbit_vhost:msg_store_dir_path(VHost),
+ supervisor2:start_child(VHostSup,
+ {Type, {rabbit_msg_store, start_link,
+ [Type, VHostDir, ClientRefs, StartupFunState]},
+ transient, ?MSG_STORE_WORKER_WAIT, worker, [rabbit_msg_store]});
+ %% we can get here if a vhost is added and removed concurrently
+ %% e.g. some integration tests do it
+ {error, {no_such_vhost, VHost}} = E ->
+ rabbit_log:error("Failed to start a message store for vhost ~s: vhost no longer exists!",
+ [VHost]),
+ E
+ end.
+
+stop(VHost, Type) ->
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of
+ {ok, VHostSup} ->
+ ok = supervisor2:terminate_child(VHostSup, Type),
+ ok = supervisor2:delete_child(VHostSup, Type);
+ %% see start/4
+ {error, {no_such_vhost, VHost}} ->
+ rabbit_log:error("Failed to stop a message store for vhost ~s: vhost no longer exists!",
+ [VHost]),
+
+ ok
+ end.
+
+client_init(VHost, Type, Ref, MsgOnDiskFun, CloseFDsFun) ->
+ with_vhost_store(VHost, Type, fun(StorePid) ->
+ rabbit_msg_store:client_init(StorePid, Ref, MsgOnDiskFun, CloseFDsFun)
+ end).
+
+with_vhost_store(VHost, Type, Fun) ->
+ case vhost_store_pid(VHost, Type) of
+ no_pid ->
+ throw({message_store_not_started, Type, VHost});
+ Pid when is_pid(Pid) ->
+ Fun(Pid)
+ end.
+
+vhost_store_pid(VHost, Type) ->
+ {ok, VHostSup} = rabbit_vhost_sup_sup:get_vhost_sup(VHost),
+ case supervisor2:find_child(VHostSup, Type) of
+ [Pid] -> Pid;
+ [] -> no_pid
+ end.
+
+successfully_recovered_state(VHost, Type) ->
+ with_vhost_store(VHost, Type, fun(StorePid) ->
+ rabbit_msg_store:successfully_recovered_state(StorePid)
+ end).
diff --git a/deps/rabbit/src/rabbit_vhost_process.erl b/deps/rabbit/src/rabbit_vhost_process.erl
new file mode 100644
index 0000000000..cf70d49010
--- /dev/null
+++ b/deps/rabbit/src/rabbit_vhost_process.erl
@@ -0,0 +1,96 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2017-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% This module implements a vhost identity process.
+
+%% On start this process will try to recover the vhost data and
+%% processes structure (queues and message stores).
+%% If recovered successfully, the process will save it's PID
+%% to vhost process registry. If vhost process PID is in the registry and the
+%% process is alive - the vhost is considered running.
+
+%% On termination, the ptocess will notify of vhost going down.
+
+%% The process will also check periodically if the vhost still
+%% present in mnesia DB and stop the vhost supervision tree when it
+%% disappears.
+
+-module(rabbit_vhost_process).
+
+%% Transitional step until we can require Erlang/OTP 21 and
+%% use the now recommended try/catch syntax for obtaining the stack trace.
+-compile(nowarn_deprecated_function).
+
+-include("rabbit.hrl").
+
+-define(TICKTIME_RATIO, 4).
+
+-behaviour(gen_server2).
+-export([start_link/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+start_link(VHost) ->
+ gen_server2:start_link(?MODULE, [VHost], []).
+
+
+init([VHost]) ->
+ process_flag(trap_exit, true),
+ rabbit_log:debug("Recovering data for VHost ~p~n", [VHost]),
+ try
+ %% Recover the vhost data and save it to vhost registry.
+ ok = rabbit_vhost:recover(VHost),
+ rabbit_vhost_sup_sup:save_vhost_process(VHost, self()),
+ Interval = interval(),
+ timer:send_interval(Interval, check_vhost),
+ true = erlang:garbage_collect(),
+ {ok, VHost}
+ catch _:Reason:Stacktrace ->
+ rabbit_amqqueue:mark_local_durable_queues_stopped(VHost),
+ rabbit_log:error("Unable to recover vhost ~p data. Reason ~p~n"
+ " Stacktrace ~p",
+ [VHost, Reason, Stacktrace]),
+ {stop, Reason}
+ end.
+
+handle_call(_,_,VHost) ->
+ {reply, ok, VHost}.
+
+handle_cast(_, VHost) ->
+ {noreply, VHost}.
+
+handle_info(check_vhost, VHost) ->
+ case rabbit_vhost:exists(VHost) of
+ true -> {noreply, VHost};
+ false ->
+ rabbit_log:warning("Virtual host '~s' is gone. "
+ "Stopping its top level supervisor.",
+ [VHost]),
+ %% Stop vhost's top supervisor in a one-off process to avoid a deadlock:
+ %% us (a child process) waiting for supervisor shutdown and our supervisor(s)
+ %% waiting for us to shutdown.
+ spawn(
+ fun() ->
+ rabbit_vhost_sup_sup:stop_and_delete_vhost(VHost)
+ end),
+ {noreply, VHost}
+ end;
+handle_info(_, VHost) ->
+ {noreply, VHost}.
+
+terminate(shutdown, VHost) ->
+ %% Notify that vhost is stopped.
+ rabbit_vhost:vhost_down(VHost);
+terminate(_, _VHost) ->
+ ok.
+
+code_change(_OldVsn, VHost, _Extra) ->
+ {ok, VHost}.
+
+interval() ->
+ application:get_env(kernel, net_ticktime, 60000) * ?TICKTIME_RATIO.
diff --git a/deps/rabbit/src/rabbit_vhost_sup.erl b/deps/rabbit/src/rabbit_vhost_sup.erl
new file mode 100644
index 0000000000..d82d827ecf
--- /dev/null
+++ b/deps/rabbit/src/rabbit_vhost_sup.erl
@@ -0,0 +1,22 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2017-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_vhost_sup).
+
+-include("rabbit.hrl").
+
+%% Each vhost gets an instance of this supervisor that supervises
+%% message stores and queues (via rabbit_amqqueue_sup_sup).
+-behaviour(supervisor2).
+-export([init/1]).
+-export([start_link/1]).
+
+start_link(VHost) ->
+ supervisor2:start_link(?MODULE, [VHost]).
+
+init([_VHost]) ->
+ {ok, {{one_for_all, 0, 1}, []}}.
diff --git a/deps/rabbit/src/rabbit_vhost_sup_sup.erl b/deps/rabbit/src/rabbit_vhost_sup_sup.erl
new file mode 100644
index 0000000000..c201237daa
--- /dev/null
+++ b/deps/rabbit/src/rabbit_vhost_sup_sup.erl
@@ -0,0 +1,271 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_vhost_sup_sup).
+
+-include("rabbit.hrl").
+
+-behaviour(supervisor2).
+
+-export([init/1]).
+
+-export([start_link/0, start/0]).
+-export([init_vhost/1,
+ start_vhost/1, start_vhost/2,
+ get_vhost_sup/1, get_vhost_sup/2,
+ save_vhost_sup/3,
+ save_vhost_process/2]).
+-export([delete_on_all_nodes/1, start_on_all_nodes/1]).
+-export([is_vhost_alive/1]).
+-export([check/0]).
+
+%% Internal
+-export([stop_and_delete_vhost/1]).
+
+-record(vhost_sup, {vhost, vhost_sup_pid, wrapper_pid, vhost_process_pid}).
+
+start() ->
+ case supervisor:start_child(rabbit_sup, {?MODULE,
+ {?MODULE, start_link, []},
+ permanent, infinity, supervisor,
+ [?MODULE]}) of
+ {ok, _} -> ok;
+ {error, Err} -> {error, Err}
+ end.
+
+start_link() ->
+ supervisor2:start_link({local, ?MODULE}, ?MODULE, []).
+
+init([]) ->
+ %% This assumes that a single vhost termination should not shut down nodes
+ %% unless the operator opts in.
+ RestartStrategy = vhost_restart_strategy(),
+ ets:new(?MODULE, [named_table, public, {keypos, #vhost_sup.vhost}]),
+ {ok, {{simple_one_for_one, 0, 5},
+ [{rabbit_vhost, {rabbit_vhost_sup_wrapper, start_link, []},
+ RestartStrategy, ?SUPERVISOR_WAIT, supervisor,
+ [rabbit_vhost_sup_wrapper, rabbit_vhost_sup]}]}}.
+
+start_on_all_nodes(VHost) ->
+ %% Do not try to start a vhost on booting peer nodes
+ AllBooted = [Node || Node <- rabbit_nodes:all_running(), rabbit:is_booted(Node)],
+ Nodes = [node() | AllBooted],
+ Results = [{Node, start_vhost(VHost, Node)} || Node <- Nodes],
+ Failures = lists:filter(fun
+ ({_, {ok, _}}) -> false;
+ ({_, {error, {already_started, _}}}) -> false;
+ (_) -> true
+ end,
+ Results),
+ case Failures of
+ [] -> ok;
+ Errors -> {error, {failed_to_start_vhost_on_nodes, Errors}}
+ end.
+
+delete_on_all_nodes(VHost) ->
+ [ stop_and_delete_vhost(VHost, Node) || Node <- rabbit_nodes:all_running() ],
+ ok.
+
+stop_and_delete_vhost(VHost) ->
+ StopResult = case lookup_vhost_sup_record(VHost) of
+ not_found -> ok;
+ #vhost_sup{wrapper_pid = WrapperPid,
+ vhost_sup_pid = VHostSupPid} ->
+ case is_process_alive(WrapperPid) of
+ false -> ok;
+ true ->
+ rabbit_log:info("Stopping vhost supervisor ~p"
+ " for vhost '~s'~n",
+ [VHostSupPid, VHost]),
+ case supervisor2:terminate_child(?MODULE, WrapperPid) of
+ ok ->
+ true = ets:delete(?MODULE, VHost),
+ ok;
+ Other ->
+ Other
+ end
+ end
+ end,
+ ok = rabbit_vhost:delete_storage(VHost),
+ StopResult.
+
+%% We take an optimistic approach whan stopping a remote VHost supervisor.
+stop_and_delete_vhost(VHost, Node) when Node == node(self()) ->
+ stop_and_delete_vhost(VHost);
+stop_and_delete_vhost(VHost, Node) ->
+ case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, stop_and_delete_vhost, [VHost]) of
+ ok -> ok;
+ {badrpc, RpcErr} ->
+ rabbit_log:error("Failed to stop and delete a vhost ~p"
+ " on node ~p."
+ " Reason: ~p",
+ [VHost, Node, RpcErr]),
+ {error, RpcErr}
+ end.
+
+-spec init_vhost(rabbit_types:vhost()) -> ok | {error, {no_such_vhost, rabbit_types:vhost()}}.
+init_vhost(VHost) ->
+ case start_vhost(VHost) of
+ {ok, _} -> ok;
+ {error, {already_started, _}} ->
+ rabbit_log:warning(
+ "Attempting to start an already started vhost '~s'.",
+ [VHost]),
+ ok;
+ {error, {no_such_vhost, VHost}} ->
+ {error, {no_such_vhost, VHost}};
+ {error, Reason} ->
+ case vhost_restart_strategy() of
+ permanent ->
+ rabbit_log:error(
+ "Unable to initialize vhost data store for vhost '~s'."
+ " Reason: ~p",
+ [VHost, Reason]),
+ throw({error, Reason});
+ transient ->
+ rabbit_log:warning(
+ "Unable to initialize vhost data store for vhost '~s'."
+ " The vhost will be stopped for this node. "
+ " Reason: ~p",
+ [VHost, Reason]),
+ ok
+ end
+ end.
+
+-type vhost_error() :: {no_such_vhost, rabbit_types:vhost()} |
+ {vhost_supervisor_not_running, rabbit_types:vhost()}.
+
+-spec get_vhost_sup(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, vhost_error() | term()}.
+get_vhost_sup(VHost, Node) ->
+ case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, get_vhost_sup, [VHost]) of
+ {ok, Pid} when is_pid(Pid) ->
+ {ok, Pid};
+ {error, Err} ->
+ {error, Err};
+ {badrpc, RpcErr} ->
+ {error, RpcErr}
+ end.
+
+-spec get_vhost_sup(rabbit_types:vhost()) -> {ok, pid()} | {error, vhost_error()}.
+get_vhost_sup(VHost) ->
+ case rabbit_vhost:exists(VHost) of
+ false ->
+ {error, {no_such_vhost, VHost}};
+ true ->
+ case vhost_sup_pid(VHost) of
+ no_pid ->
+ {error, {vhost_supervisor_not_running, VHost}};
+ {ok, Pid} when is_pid(Pid) ->
+ {ok, Pid}
+ end
+ end.
+
+-spec start_vhost(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, term()}.
+start_vhost(VHost, Node) ->
+ case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, start_vhost, [VHost]) of
+ {ok, Pid} -> {ok, Pid};
+ {error, Err} -> {error, Err};
+ {badrpc, RpcErr} -> {error, RpcErr}
+ end.
+
+-spec start_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}.
+start_vhost(VHost) ->
+ case rabbit_vhost:exists(VHost) of
+ false -> {error, {no_such_vhost, VHost}};
+ true ->
+ case whereis(?MODULE) of
+ Pid when is_pid(Pid) ->
+ supervisor2:start_child(?MODULE, [VHost]);
+ undefined ->
+ {error, rabbit_vhost_sup_sup_not_running}
+ end
+ end.
+
+-spec is_vhost_alive(rabbit_types:vhost()) -> boolean().
+is_vhost_alive(VHost) ->
+%% A vhost is considered alive if it's supervision tree is alive and
+%% saved in the ETS table
+ case lookup_vhost_sup_record(VHost) of
+ #vhost_sup{wrapper_pid = WrapperPid,
+ vhost_sup_pid = VHostSupPid,
+ vhost_process_pid = VHostProcessPid}
+ when is_pid(WrapperPid),
+ is_pid(VHostSupPid),
+ is_pid(VHostProcessPid) ->
+ is_process_alive(WrapperPid)
+ andalso
+ is_process_alive(VHostSupPid)
+ andalso
+ is_process_alive(VHostProcessPid);
+ _ -> false
+ end.
+
+
+-spec save_vhost_sup(rabbit_types:vhost(), pid(), pid()) -> ok.
+save_vhost_sup(VHost, WrapperPid, VHostPid) ->
+ true = ets:insert(?MODULE, #vhost_sup{vhost = VHost,
+ vhost_sup_pid = VHostPid,
+ wrapper_pid = WrapperPid}),
+ ok.
+
+-spec save_vhost_process(rabbit_types:vhost(), pid()) -> ok.
+save_vhost_process(VHost, VHostProcessPid) ->
+ true = ets:update_element(?MODULE, VHost,
+ {#vhost_sup.vhost_process_pid, VHostProcessPid}),
+ ok.
+
+-spec lookup_vhost_sup_record(rabbit_types:vhost()) -> #vhost_sup{} | not_found.
+lookup_vhost_sup_record(VHost) ->
+ case ets:info(?MODULE, name) of
+ ?MODULE ->
+ case ets:lookup(?MODULE, VHost) of
+ [] -> not_found;
+ [#vhost_sup{} = VHostSup] -> VHostSup
+ end;
+ undefined -> not_found
+ end.
+
+-spec vhost_sup_pid(rabbit_types:vhost()) -> no_pid | {ok, pid()}.
+vhost_sup_pid(VHost) ->
+ case lookup_vhost_sup_record(VHost) of
+ not_found ->
+ no_pid;
+ #vhost_sup{vhost_sup_pid = Pid} = VHostSup ->
+ case erlang:is_process_alive(Pid) of
+ true -> {ok, Pid};
+ false ->
+ ets:delete_object(?MODULE, VHostSup),
+ no_pid
+ end
+ end.
+
+vhost_restart_strategy() ->
+ %% This assumes that a single vhost termination should not shut down nodes
+ %% unless the operator opts in.
+ case application:get_env(rabbit, vhost_restart_strategy, continue) of
+ continue -> transient;
+ stop_node -> permanent;
+ transient -> transient;
+ permanent -> permanent
+ end.
+
+check() ->
+ VHosts = rabbit_vhost:list_names(),
+ lists:filter(
+ fun(V) ->
+ case rabbit_vhost_sup_sup:get_vhost_sup(V) of
+ {ok, Sup} ->
+ MsgStores = [Pid || {Name, Pid, _, _} <- supervisor:which_children(Sup),
+ lists:member(Name, [msg_store_persistent,
+ msg_store_transient])],
+ not is_vhost_alive(V) orelse (not lists:all(fun(P) ->
+ erlang:is_process_alive(P)
+ end, MsgStores));
+ {error, _} ->
+ true
+ end
+ end, VHosts).
diff --git a/deps/rabbit/src/rabbit_vhost_sup_wrapper.erl b/deps/rabbit/src/rabbit_vhost_sup_wrapper.erl
new file mode 100644
index 0000000000..ed239ade69
--- /dev/null
+++ b/deps/rabbit/src/rabbit_vhost_sup_wrapper.erl
@@ -0,0 +1,57 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2017-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% This module is a wrapper around vhost supervisor to
+%% provide exactly once restart semantics.
+
+-module(rabbit_vhost_sup_wrapper).
+
+-include("rabbit.hrl").
+
+-behaviour(supervisor2).
+-export([init/1]).
+-export([start_link/1]).
+-export([start_vhost_sup/1]).
+
+start_link(VHost) ->
+ %% Using supervisor, because supervisor2 does not stop a started child when
+ %% another one fails to start. Bug?
+ case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of
+ {ok, Pid} ->
+ {error, {already_started, Pid}};
+ {error, _} ->
+ supervisor:start_link(?MODULE, [VHost])
+ end.
+
+init([VHost]) ->
+ %% 2 restarts in 5 minutes. One per message store.
+ {ok, {{one_for_all, 2, 300},
+ [
+ %% rabbit_vhost_sup is an empty supervisor container for
+ %% all data processes.
+ {rabbit_vhost_sup,
+ {rabbit_vhost_sup_wrapper, start_vhost_sup, [VHost]},
+ permanent, infinity, supervisor,
+ [rabbit_vhost_sup]},
+ %% rabbit_vhost_process is a vhost identity process, which
+ %% is responsible for data recovery and vhost aliveness status.
+ %% See the module comments for more info.
+ {rabbit_vhost_process,
+ {rabbit_vhost_process, start_link, [VHost]},
+ permanent, ?WORKER_WAIT, worker,
+ [rabbit_vhost_process]}]}}.
+
+
+start_vhost_sup(VHost) ->
+ case rabbit_vhost_sup:start_link(VHost) of
+ {ok, Pid} ->
+ %% Save vhost sup record with wrapper pid and vhost sup pid.
+ ok = rabbit_vhost_sup_sup:save_vhost_sup(VHost, self(), Pid),
+ {ok, Pid};
+ Other ->
+ Other
+ end.
diff --git a/deps/rabbit/src/rabbit_vm.erl b/deps/rabbit/src/rabbit_vm.erl
new file mode 100644
index 0000000000..b014e090c5
--- /dev/null
+++ b/deps/rabbit/src/rabbit_vm.erl
@@ -0,0 +1,427 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(rabbit_vm).
+
+-export([memory/0, binary/0, ets_tables_memory/1]).
+
+-define(MAGIC_PLUGINS, ["cowboy", "ranch", "sockjs"]).
+
+%%----------------------------------------------------------------------------
+
+-spec memory() -> rabbit_types:infos().
+
+memory() ->
+ All = interesting_sups(),
+ {Sums, _Other} = sum_processes(
+ lists:append(All), distinguishers(), [memory]),
+
+ [Qs, QsSlave, Qqs, Ssqs, Srqs, SCoor, ConnsReader, ConnsWriter, ConnsChannel,
+ ConnsOther, MsgIndexProc, MgmtDbProc, Plugins] =
+ [aggregate(Names, Sums, memory, fun (X) -> X end)
+ || Names <- distinguished_interesting_sups()],
+
+ MnesiaETS = mnesia_memory(),
+ MsgIndexETS = ets_memory(msg_stores()),
+ MetricsETS = ets_memory([rabbit_metrics]),
+ QuorumETS = ets_memory([ra_log_ets]),
+ MetricsProc = try
+ [{_, M}] = process_info(whereis(rabbit_metrics), [memory]),
+ M
+ catch
+ error:badarg ->
+ 0
+ end,
+ MgmtDbETS = ets_memory([rabbit_mgmt_storage]),
+ [{total, ErlangTotal},
+ {processes, Processes},
+ {ets, ETS},
+ {atom, Atom},
+ {binary, Bin},
+ {code, Code},
+ {system, System}] =
+ erlang:memory([total, processes, ets, atom, binary, code, system]),
+
+ Strategy = vm_memory_monitor:get_memory_calculation_strategy(),
+ Allocated = recon_alloc:memory(allocated),
+ Rss = vm_memory_monitor:get_rss_memory(),
+
+ AllocatedUnused = max(Allocated - ErlangTotal, 0),
+ OSReserved = max(Rss - Allocated, 0),
+
+ OtherProc = Processes
+ - ConnsReader - ConnsWriter - ConnsChannel - ConnsOther
+ - Qs - QsSlave - Qqs - Ssqs - Srqs - SCoor - MsgIndexProc - Plugins
+ - MgmtDbProc - MetricsProc,
+
+ [
+ %% Connections
+ {connection_readers, ConnsReader},
+ {connection_writers, ConnsWriter},
+ {connection_channels, ConnsChannel},
+ {connection_other, ConnsOther},
+
+ %% Queues
+ {queue_procs, Qs},
+ {queue_slave_procs, QsSlave},
+ {quorum_queue_procs, Qqs},
+ {stream_queue_procs, Ssqs},
+ {stream_queue_replica_reader_procs, Srqs},
+ {stream_queue_coordinator_procs, SCoor},
+
+ %% Processes
+ {plugins, Plugins},
+ {other_proc, lists:max([0, OtherProc])}, %% [1]
+
+ %% Metrics
+ {metrics, MetricsETS + MetricsProc},
+ {mgmt_db, MgmtDbETS + MgmtDbProc},
+
+ %% ETS
+ {mnesia, MnesiaETS},
+ {quorum_ets, QuorumETS},
+ {other_ets, ETS - MnesiaETS - MetricsETS - MgmtDbETS - MsgIndexETS - QuorumETS},
+
+ %% Messages (mostly, some binaries are not messages)
+ {binary, Bin},
+ {msg_index, MsgIndexETS + MsgIndexProc},
+
+ %% System
+ {code, Code},
+ {atom, Atom},
+ {other_system, System - ETS - Bin - Code - Atom},
+ {allocated_unused, AllocatedUnused},
+ {reserved_unallocated, OSReserved},
+ {strategy, Strategy},
+ {total, [{erlang, ErlangTotal},
+ {rss, Rss},
+ {allocated, Allocated}]}
+ ].
+%% [1] - erlang:memory(processes) can be less than the sum of its
+%% parts. Rather than display something nonsensical, just silence any
+%% claims about negative memory. See
+%% http://erlang.org/pipermail/erlang-questions/2012-September/069320.html
+
+-spec binary() -> rabbit_types:infos().
+
+binary() ->
+ All = interesting_sups(),
+ {Sums, Rest} =
+ sum_processes(
+ lists:append(All),
+ fun (binary, Info, Acc) ->
+ lists:foldl(fun ({Ptr, Sz, _RefCnt}, Acc0) ->
+ sets:add_element({Ptr, Sz}, Acc0)
+ end, Acc, Info)
+ end, distinguishers(), [{binary, sets:new()}]),
+ [Other, Qs, QsSlave, Qqs, Ssqs, Srqs, Scoor, ConnsReader, ConnsWriter,
+ ConnsChannel, ConnsOther, MsgIndexProc, MgmtDbProc, Plugins] =
+ [aggregate(Names, [{other, Rest} | Sums], binary, fun sum_binary/1)
+ || Names <- [[other] | distinguished_interesting_sups()]],
+ [{connection_readers, ConnsReader},
+ {connection_writers, ConnsWriter},
+ {connection_channels, ConnsChannel},
+ {connection_other, ConnsOther},
+ {queue_procs, Qs},
+ {queue_slave_procs, QsSlave},
+ {quorum_queue_procs, Qqs},
+ {stream_queue_procs, Ssqs},
+ {stream_queue_replica_reader_procs, Srqs},
+ {stream_queue_coordinator_procs, Scoor},
+ {plugins, Plugins},
+ {mgmt_db, MgmtDbProc},
+ {msg_index, MsgIndexProc},
+ {other, Other}].
+
+%%----------------------------------------------------------------------------
+
+mnesia_memory() ->
+ case mnesia:system_info(is_running) of
+ yes -> lists:sum([bytes(mnesia:table_info(Tab, memory)) ||
+ Tab <- mnesia:system_info(tables)]);
+ _ -> 0
+ end.
+
+ets_memory(Owners) ->
+ lists:sum([V || {_K, V} <- ets_tables_memory(Owners)]).
+
+-spec ets_tables_memory(Owners) -> rabbit_types:infos()
+ when Owners :: all | OwnerProcessName | [OwnerProcessName],
+ OwnerProcessName :: atom().
+
+ets_tables_memory(all) ->
+ [{ets:info(T, name), bytes(ets:info(T, memory))}
+ || T <- ets:all(),
+ is_atom(T)];
+ets_tables_memory(OwnerName) when is_atom(OwnerName) ->
+ ets_tables_memory([OwnerName]);
+ets_tables_memory(Owners) when is_list(Owners) ->
+ OwnerPids = lists:map(fun(O) when is_pid(O) -> O;
+ (O) when is_atom(O) -> whereis(O)
+ end,
+ Owners),
+ [{ets:info(T, name), bytes(ets:info(T, memory))}
+ || T <- ets:all(),
+ lists:member(ets:info(T, owner), OwnerPids)].
+
+bytes(Words) -> try
+ Words * erlang:system_info(wordsize)
+ catch
+ _:_ -> 0
+ end.
+
+interesting_sups() ->
+ [queue_sups(), quorum_sups(), stream_server_sups(), stream_reader_sups(),
+ conn_sups() | interesting_sups0()].
+
+queue_sups() ->
+ all_vhosts_children(rabbit_amqqueue_sup_sup).
+
+quorum_sups() ->
+ %% TODO: in the future not all ra servers may be queues and we needs
+ %% some way to filter this
+ case whereis(ra_server_sup_sup) of
+ undefined ->
+ [];
+ _ ->
+ [Pid || {_, Pid, _, _} <-
+ supervisor:which_children(ra_server_sup_sup)]
+ end.
+
+stream_server_sups() -> [osiris_server_sup].
+stream_reader_sups() -> [osiris_replica_reader_sup].
+
+msg_stores() ->
+ all_vhosts_children(msg_store_transient)
+ ++
+ all_vhosts_children(msg_store_persistent).
+
+all_vhosts_children(Name) ->
+ case whereis(rabbit_vhost_sup_sup) of
+ undefined -> [];
+ Pid when is_pid(Pid) ->
+ lists:filtermap(
+ fun({_, VHostSupWrapper, _, _}) ->
+ case supervisor2:find_child(VHostSupWrapper,
+ rabbit_vhost_sup) of
+ [] -> false;
+ [VHostSup] ->
+ case supervisor2:find_child(VHostSup, Name) of
+ [QSup] -> {true, QSup};
+ [] -> false
+ end
+ end
+ end,
+ supervisor:which_children(rabbit_vhost_sup_sup))
+ end.
+
+interesting_sups0() ->
+ MsgIndexProcs = msg_stores(),
+ MgmtDbProcs = [rabbit_mgmt_sup_sup],
+ PluginProcs = plugin_sups(),
+ [MsgIndexProcs, MgmtDbProcs, PluginProcs].
+
+conn_sups() ->
+ Ranches = lists:flatten(ranch_server_sups()),
+ [amqp_sup|Ranches].
+
+ranch_server_sups() ->
+ try
+ ets:match(ranch_server, {{conns_sup, '_'}, '$1'})
+ catch
+ %% Ranch ETS table doesn't exist yet
+ error:badarg -> []
+ end.
+
+with(Sups, With) -> [{Sup, With} || Sup <- Sups].
+
+distinguishers() -> with(queue_sups(), fun queue_type/1) ++
+ with(conn_sups(), fun conn_type/1) ++
+ with(quorum_sups(), fun ra_type/1).
+
+distinguished_interesting_sups() ->
+ [
+ with(queue_sups(), master),
+ with(queue_sups(), slave),
+ with(quorum_sups(), quorum),
+ stream_server_sups(),
+ stream_reader_sups(),
+ with(quorum_sups(), stream),
+ with(conn_sups(), reader),
+ with(conn_sups(), writer),
+ with(conn_sups(), channel),
+ with(conn_sups(), other)]
+ ++ interesting_sups0().
+
+plugin_sups() ->
+ lists:append([plugin_sup(App) ||
+ {App, _, _} <- rabbit_misc:which_applications(),
+ is_plugin(atom_to_list(App))]).
+
+plugin_sup(App) ->
+ case application_controller:get_master(App) of
+ undefined -> [];
+ Master -> case application_master:get_child(Master) of
+ {Pid, _} when is_pid(Pid) -> [process_name(Pid)];
+ Pid when is_pid(Pid) -> [process_name(Pid)];
+ _ -> []
+ end
+ end.
+
+process_name(Pid) ->
+ case process_info(Pid, registered_name) of
+ {registered_name, Name} -> Name;
+ _ -> Pid
+ end.
+
+is_plugin("rabbitmq_" ++ _) -> true;
+is_plugin(App) -> lists:member(App, ?MAGIC_PLUGINS).
+
+aggregate(Names, Sums, Key, Fun) ->
+ lists:sum([extract(Name, Sums, Key, Fun) || Name <- Names]).
+
+extract(Name, Sums, Key, Fun) ->
+ case keyfind(Name, Sums) of
+ {value, Accs} -> Fun(keyfetch(Key, Accs));
+ false -> 0
+ end.
+
+sum_binary(Set) ->
+ sets:fold(fun({_Pt, Sz}, Acc) -> Acc + Sz end, 0, Set).
+
+queue_type(PDict) ->
+ case keyfind(process_name, PDict) of
+ {value, {rabbit_mirror_queue_slave, _}} -> slave;
+ _ -> master
+ end.
+
+conn_type(PDict) ->
+ case keyfind(process_name, PDict) of
+ {value, {rabbit_reader, _}} -> reader;
+ {value, {rabbit_writer, _}} -> writer;
+ {value, {rabbit_channel, _}} -> channel;
+ _ -> other
+ end.
+
+ra_type(PDict) ->
+ case keyfind('$rabbit_vm_category', PDict) of
+ {value, rabbit_stream_coordinator} -> stream;
+ _ -> quorum
+ end.
+
+%%----------------------------------------------------------------------------
+
+%% NB: this code is non-rabbit specific.
+
+-type process() :: pid() | atom().
+-type info_key() :: atom().
+-type info_value() :: any().
+-type info_item() :: {info_key(), info_value()}.
+-type accumulate() :: fun ((info_key(), info_value(), info_value()) ->
+ info_value()).
+-type distinguisher() :: fun (([{term(), term()}]) -> atom()).
+-type distinguishers() :: [{info_key(), distinguisher()}].
+-spec sum_processes([process()], distinguishers(), [info_key()]) ->
+ {[{process(), [info_item()]}], [info_item()]}.
+-spec sum_processes([process()], accumulate(), distinguishers(),
+ [info_item()]) ->
+ {[{process(), [info_item()]}], [info_item()]}.
+
+sum_processes(Names, Distinguishers, Items) ->
+ sum_processes(Names, fun (_, X, Y) -> X + Y end, Distinguishers,
+ [{Item, 0} || Item <- Items]).
+
+%% summarize the process_info of all processes based on their
+%% '$ancestor' hierarchy, recorded in their process dictionary.
+%%
+%% The function takes
+%%
+%% 1) a list of names/pids of processes that are accumulation points
+%% in the hierarchy.
+%%
+%% 2) a function that aggregates individual info items -taking the
+%% info item key, value and accumulated value as the input and
+%% producing a new accumulated value.
+%%
+%% 3) a list of info item key / initial accumulator value pairs.
+%%
+%% The process_info of a process is accumulated at the nearest of its
+%% ancestors that is mentioned in the first argument, or, if no such
+%% ancestor exists or the ancestor information is absent, in a special
+%% 'other' bucket.
+%%
+%% The result is a pair consisting of
+%%
+%% 1) a k/v list, containing for each of the accumulation names/pids a
+%% list of info items, containing the accumulated data, and
+%%
+%% 2) the 'other' bucket - a list of info items containing the
+%% accumulated data of all processes with no matching ancestors
+%%
+%% Note that this function operates on names as well as pids, but
+%% these must match whatever is contained in the '$ancestor' process
+%% dictionary entry. Generally that means for all registered processes
+%% the name should be used.
+sum_processes(Names, Fun, Distinguishers, Acc0) ->
+ Items = [Item || {Item, _Blank0} <- Acc0],
+ {NameAccs, OtherAcc} =
+ lists:foldl(
+ fun (Pid, Acc) ->
+ InfoItems = [registered_name, dictionary | Items],
+ case process_info(Pid, InfoItems) of
+ undefined ->
+ Acc;
+ [{registered_name, RegName}, {dictionary, D} | Vals] ->
+ %% see docs for process_info/2 for the
+ %% special handling of 'registered_name'
+ %% info items
+ Extra = case RegName of
+ [] -> [];
+ N -> [N]
+ end,
+ Name0 = find_ancestor(Extra, D, Names),
+ Name = case keyfind(Name0, Distinguishers) of
+ {value, DistFun} -> {Name0, DistFun(D)};
+ false -> Name0
+ end,
+ accumulate(
+ Name, Fun, orddict:from_list(Vals), Acc, Acc0)
+ end
+ end, {orddict:new(), Acc0}, processes()),
+ %% these conversions aren't strictly necessary; we do them simply
+ %% for the sake of encapsulating the representation.
+ {[{Name, orddict:to_list(Accs)} ||
+ {Name, Accs} <- orddict:to_list(NameAccs)],
+ orddict:to_list(OtherAcc)}.
+
+find_ancestor(Extra, D, Names) ->
+ Ancestors = case keyfind('$ancestors', D) of
+ {value, Ancs} -> Ancs;
+ false -> []
+ end,
+ case lists:splitwith(fun (A) -> not lists:member(A, Names) end,
+ Extra ++ Ancestors) of
+ {_, []} -> undefined;
+ {_, [Name | _]} -> Name
+ end.
+
+accumulate(undefined, Fun, ValsDict, {NameAccs, OtherAcc}, _Acc0) ->
+ {NameAccs, orddict:merge(Fun, ValsDict, OtherAcc)};
+accumulate(Name, Fun, ValsDict, {NameAccs, OtherAcc}, Acc0) ->
+ F = fun (NameAcc) -> orddict:merge(Fun, ValsDict, NameAcc) end,
+ {case orddict:is_key(Name, NameAccs) of
+ true -> orddict:update(Name, F, NameAccs);
+ false -> orddict:store( Name, F(Acc0), NameAccs)
+ end, OtherAcc}.
+
+keyfetch(K, L) -> {value, {_, V}} = lists:keysearch(K, 1, L),
+ V.
+
+keyfind(K, L) -> case lists:keysearch(K, 1, L) of
+ {value, {_, V}} -> {value, V};
+ false -> false
+ end.
diff --git a/deps/rabbit/src/supervised_lifecycle.erl b/deps/rabbit/src/supervised_lifecycle.erl
new file mode 100644
index 0000000000..0e1bb9b5c8
--- /dev/null
+++ b/deps/rabbit/src/supervised_lifecycle.erl
@@ -0,0 +1,53 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+%% Invoke callbacks on startup and termination.
+%%
+%% Simply hook this process into a supervision hierarchy, to have the
+%% callbacks invoked at a precise point during the establishment and
+%% teardown of that hierarchy, respectively.
+%%
+%% Or launch the process independently, and link to it, to have the
+%% callbacks invoked on startup and when the linked process
+%% terminates, respectively.
+
+-module(supervised_lifecycle).
+
+-behavior(gen_server).
+
+-export([start_link/3]).
+
+%% gen_server callbacks
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+%%----------------------------------------------------------------------------
+
+-spec start_link(atom(), rabbit_types:mfargs(), rabbit_types:mfargs()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(Name, StartMFA, StopMFA) ->
+ gen_server:start_link({local, Name}, ?MODULE, [StartMFA, StopMFA], []).
+
+%%----------------------------------------------------------------------------
+
+init([{M, F, A}, StopMFA]) ->
+ process_flag(trap_exit, true),
+ apply(M, F, A),
+ {ok, StopMFA}.
+
+handle_call(_Request, _From, State) -> {noreply, State}.
+
+handle_cast(_Msg, State) -> {noreply, State}.
+
+handle_info(_Info, State) -> {noreply, State}.
+
+terminate(_Reason, {M, F, A}) ->
+ apply(M, F, A),
+ ok.
+
+code_change(_OldVsn, State, _Extra) -> {ok, State}.
diff --git a/deps/rabbit/src/tcp_listener.erl b/deps/rabbit/src/tcp_listener.erl
new file mode 100644
index 0000000000..93c24ab397
--- /dev/null
+++ b/deps/rabbit/src/tcp_listener.erl
@@ -0,0 +1,90 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(tcp_listener).
+
+%% Represents a running TCP listener (a process that listens for inbound
+%% TCP or TLS connections). Every protocol supported typically has one
+%% or two listeners, plain TCP and (optionally) TLS, but there can
+%% be more, e.g. when multiple network interfaces are involved.
+%%
+%% A listener has 6 properties (is a tuple of 6):
+%%
+%% * IP address
+%% * Port
+%% * Node
+%% * Label (human-friendly name, e.g. AMQP 0-9-1)
+%% * Startup callback
+%% * Shutdown callback
+%%
+%% Listeners use Ranch in embedded mode to accept and "bridge" client
+%% connections with protocol entry points such as rabbit_reader.
+%%
+%% Listeners are tracked in a Mnesia table so that they can be
+%%
+%% * Shut down
+%% * Listed (e.g. in the management UI)
+%%
+%% Every tcp_listener process has callbacks that are executed on start
+%% and termination. Those must take care of listener registration
+%% among other things.
+%%
+%% Listeners are supervised by tcp_listener_sup (one supervisor per protocol).
+%%
+%% See also rabbit_networking and tcp_listener_sup.
+
+-behaviour(gen_server).
+
+-export([start_link/5]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-record(state, {on_startup, on_shutdown, label, ip, port}).
+
+%%----------------------------------------------------------------------------
+
+-type mfargs() :: {atom(), atom(), [any()]}.
+
+-spec start_link
+ (inet:ip_address(), inet:port_number(),
+ mfargs(), mfargs(), string()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(IPAddress, Port,
+ OnStartup, OnShutdown, Label) ->
+ gen_server:start_link(
+ ?MODULE, {IPAddress, Port,
+ OnStartup, OnShutdown, Label}, []).
+
+%%--------------------------------------------------------------------
+
+init({IPAddress, Port, {M,F,A} = OnStartup, OnShutdown, Label}) ->
+ process_flag(trap_exit, true),
+ error_logger:info_msg(
+ "started ~s on ~s:~p~n",
+ [Label, rabbit_misc:ntoab(IPAddress), Port]),
+ apply(M, F, A ++ [IPAddress, Port]),
+ {ok, #state{on_startup = OnStartup, on_shutdown = OnShutdown,
+ label = Label, ip=IPAddress, port=Port}}.
+
+handle_call(_Request, _From, State) ->
+ {noreply, State}.
+
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, #state{on_shutdown = {M,F,A}, label=Label, ip=IPAddress, port=Port}) ->
+ error_logger:info_msg("stopped ~s on ~s:~p~n",
+ [Label, rabbit_misc:ntoab(IPAddress), Port]),
+ apply(M, F, A ++ [IPAddress, Port]).
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
diff --git a/deps/rabbit/src/tcp_listener_sup.erl b/deps/rabbit/src/tcp_listener_sup.erl
new file mode 100644
index 0000000000..82128bb2af
--- /dev/null
+++ b/deps/rabbit/src/tcp_listener_sup.erl
@@ -0,0 +1,54 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(tcp_listener_sup).
+
+%% Supervises TCP listeners. There is a separate supervisor for every
+%% protocol. In case of AMQP 0-9-1, it resides under rabbit_sup. Plugins
+%% that provide protocol support (e.g. STOMP) have an instance of this supervisor in their
+%% app supervision tree.
+%%
+%% See also rabbit_networking and tcp_listener.
+
+-behaviour(supervisor).
+
+-export([start_link/10]).
+-export([init/1]).
+
+-type mfargs() :: {atom(), atom(), [any()]}.
+
+-spec start_link
+ (inet:ip_address(), inet:port_number(), module(), [gen_tcp:listen_option()],
+ module(), any(), mfargs(), mfargs(), integer(), string()) ->
+ rabbit_types:ok_pid_or_error().
+
+start_link(IPAddress, Port, Transport, SocketOpts, ProtoSup, ProtoOpts, OnStartup, OnShutdown,
+ ConcurrentAcceptorCount, Label) ->
+ supervisor:start_link(
+ ?MODULE, {IPAddress, Port, Transport, SocketOpts, ProtoSup, ProtoOpts, OnStartup, OnShutdown,
+ ConcurrentAcceptorCount, Label}).
+
+init({IPAddress, Port, Transport, SocketOpts, ProtoSup, ProtoOpts, OnStartup, OnShutdown,
+ ConcurrentAcceptorCount, Label}) ->
+ {ok, AckTimeout} = application:get_env(rabbit, ssl_handshake_timeout),
+ MaxConnections = rabbit_misc:get_env(rabbit, connection_max, infinity),
+ RanchListenerOpts = #{
+ num_acceptors => ConcurrentAcceptorCount,
+ max_connections => MaxConnections,
+ handshake_timeout => AckTimeout,
+ connection_type => supervisor,
+ socket_opts => [{ip, IPAddress},
+ {port, Port} |
+ SocketOpts]
+ },
+ Flags = {one_for_all, 10, 10},
+ OurChildSpecStart = {tcp_listener, start_link, [IPAddress, Port, OnStartup, OnShutdown, Label]},
+ OurChildSpec = {tcp_listener, OurChildSpecStart, transient, 16#ffffffff, worker, [tcp_listener]},
+ RanchChildSpec = ranch:child_spec(rabbit_networking:ranch_ref(IPAddress, Port),
+ Transport, RanchListenerOpts,
+ ProtoSup, ProtoOpts),
+ {ok, {Flags, [RanchChildSpec, OurChildSpec]}}.
diff --git a/deps/rabbit/src/term_to_binary_compat.erl b/deps/rabbit/src/term_to_binary_compat.erl
new file mode 100644
index 0000000000..327a846d1f
--- /dev/null
+++ b/deps/rabbit/src/term_to_binary_compat.erl
@@ -0,0 +1,15 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2017-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(term_to_binary_compat).
+
+-include("rabbit.hrl").
+
+-export([term_to_binary_1/1]).
+
+term_to_binary_1(Term) ->
+ term_to_binary(Term, [{minor_version, 1}]).
diff --git a/deps/rabbit/src/vhost.erl b/deps/rabbit/src/vhost.erl
new file mode 100644
index 0000000000..ca704183a0
--- /dev/null
+++ b/deps/rabbit/src/vhost.erl
@@ -0,0 +1,172 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(vhost).
+
+-include_lib("rabbit_common/include/rabbit.hrl").
+-include("vhost.hrl").
+
+-export([
+ new/2,
+ new/3,
+ fields/0,
+ fields/1,
+ info_keys/0,
+ record_version_to_use/0,
+ upgrade/1,
+ upgrade_to/2,
+ pattern_match_all/0,
+ get_name/1,
+ get_limits/1,
+ get_metadata/1,
+ get_description/1,
+ get_tags/1,
+ set_limits/2
+]).
+
+-define(record_version, vhost_v2).
+
+-type(name() :: binary()).
+
+-type(metadata_key() :: atom()).
+
+-type(metadata() :: #{description => binary(),
+ tags => [atom()],
+ metadata_key() => any()} | undefined).
+
+-type vhost() :: vhost_v1:vhost_v1() | vhost_v2().
+
+-record(vhost, {
+ %% name as a binary
+ virtual_host :: name() | '_',
+ %% proplist of limits configured, if any
+ limits :: list() | '_',
+ metadata :: metadata() | '_'
+}).
+
+-type vhost_v2() :: #vhost{
+ virtual_host :: name(),
+ limits :: list(),
+ metadata :: metadata()
+ }.
+
+-type vhost_pattern() :: vhost_v1:vhost_v1_pattern() |
+ vhost_v2_pattern().
+-type vhost_v2_pattern() :: #vhost{
+ virtual_host :: name() | '_',
+ limits :: '_',
+ metadata :: '_'
+ }.
+
+-export_type([name/0,
+ metadata_key/0,
+ metadata/0,
+ vhost/0,
+ vhost_v2/0,
+ vhost_pattern/0,
+ vhost_v2_pattern/0]).
+
+-spec new(name(), list()) -> vhost().
+new(Name, Limits) ->
+ case record_version_to_use() of
+ ?record_version ->
+ #vhost{virtual_host = Name, limits = Limits};
+ _ ->
+ vhost_v1:new(Name, Limits)
+ end.
+
+-spec new(name(), list(), map()) -> vhost().
+new(Name, Limits, Metadata) ->
+ case record_version_to_use() of
+ ?record_version ->
+ #vhost{virtual_host = Name, limits = Limits, metadata = Metadata};
+ _ ->
+ vhost_v1:new(Name, Limits)
+ end.
+
+-spec record_version_to_use() -> vhost_v1 | vhost_v2.
+
+record_version_to_use() ->
+ case rabbit_feature_flags:is_enabled(virtual_host_metadata) of
+ true -> ?record_version;
+ false -> vhost_v1:record_version_to_use()
+ end.
+
+-spec upgrade(vhost()) -> vhost().
+
+upgrade(#vhost{} = VHost) -> VHost;
+upgrade(OldVHost) -> upgrade_to(record_version_to_use(), OldVHost).
+
+-spec upgrade_to
+(vhost_v2, vhost()) -> vhost_v2();
+(vhost_v1, vhost_v1:vhost_v1()) -> vhost_v1:vhost_v1().
+
+upgrade_to(?record_version, #vhost{} = VHost) ->
+ VHost;
+upgrade_to(?record_version, OldVHost) ->
+ Fields = erlang:tuple_to_list(OldVHost) ++ [#{description => <<"">>, tags => []}],
+ #vhost{} = erlang:list_to_tuple(Fields);
+upgrade_to(Version, OldVHost) ->
+ vhost_v1:upgrade_to(Version, OldVHost).
+
+
+fields() ->
+ case record_version_to_use() of
+ ?record_version -> fields(?record_version);
+ _ -> vhost_v1:fields()
+ end.
+
+fields(?record_version) -> record_info(fields, vhost);
+fields(Version) -> vhost_v1:fields(Version).
+
+info_keys() ->
+ case record_version_to_use() of
+ %% note: this reports description and tags separately even though
+ %% they are stored in the metadata map. MK.
+ ?record_version -> [name, description, tags, metadata, tracing, cluster_state];
+ _ -> vhost_v1:info_keys()
+ end.
+
+-spec pattern_match_all() -> vhost_pattern().
+
+pattern_match_all() ->
+ case record_version_to_use() of
+ ?record_version -> #vhost{_ = '_'};
+ _ -> vhost_v1:pattern_match_all()
+ end.
+
+-spec get_name(vhost()) -> name().
+get_name(#vhost{virtual_host = Value}) -> Value;
+get_name(VHost) -> vhost_v1:get_name(VHost).
+
+-spec get_limits(vhost()) -> list().
+get_limits(#vhost{limits = Value}) -> Value;
+get_limits(VHost) -> vhost_v1:get_limits(VHost).
+
+-spec get_metadata(vhost()) -> metadata().
+get_metadata(#vhost{metadata = Value}) -> Value;
+get_metadata(VHost) -> vhost_v1:get_metadata(VHost).
+
+-spec get_description(vhost()) -> binary().
+get_description(#vhost{} = VHost) ->
+ maps:get(description, get_metadata(VHost), undefined);
+get_description(VHost) ->
+ vhost_v1:get_description(VHost).
+
+-spec get_tags(vhost()) -> [atom()].
+get_tags(#vhost{} = VHost) ->
+ maps:get(tags, get_metadata(VHost), undefined);
+get_tags(VHost) ->
+ vhost_v1:get_tags(VHost).
+
+set_limits(VHost, Value) ->
+ case record_version_to_use() of
+ ?record_version ->
+ VHost#vhost{limits = Value};
+ _ ->
+ vhost_v1:set_limits(VHost, Value)
+ end.
diff --git a/deps/rabbit/src/vhost_v1.erl b/deps/rabbit/src/vhost_v1.erl
new file mode 100644
index 0000000000..5b53eb148a
--- /dev/null
+++ b/deps/rabbit/src/vhost_v1.erl
@@ -0,0 +1,106 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved.
+%%
+
+-module(vhost_v1).
+
+-include("vhost.hrl").
+
+-export([new/2,
+ new/3,
+ upgrade/1,
+ upgrade_to/2,
+ fields/0,
+ fields/1,
+ info_keys/0,
+ field_name/0,
+ record_version_to_use/0,
+ pattern_match_all/0,
+ get_name/1,
+ get_limits/1,
+ get_metadata/1,
+ get_description/1,
+ get_tags/1,
+ set_limits/2
+]).
+
+-define(record_version, ?MODULE).
+
+%% Represents a vhost.
+%%
+%% Historically this record had 2 arguments although the 2nd
+%% was never used (`dummy`, always undefined). This is because
+%% single field records were/are illegal in OTP.
+%%
+%% As of 3.6.x, the second argument is vhost limits,
+%% which is actually used and has the same default.
+%% Nonetheless, this required a migration, see rabbit_upgrade_functions.
+
+-record(vhost, {
+ %% name as a binary
+ virtual_host :: vhost:name() | '_',
+ %% proplist of limits configured, if any
+ limits :: list() | '_'}).
+
+-type vhost() :: vhost_v1().
+-type vhost_v1() :: #vhost{
+ virtual_host :: vhost:name(),
+ limits :: list()
+ }.
+
+-export_type([vhost/0,
+ vhost_v1/0,
+ vhost_pattern/0,
+ vhost_v1_pattern/0]).
+
+
+-spec new(vhost:name(), list()) -> vhost().
+new(Name, Limits) ->
+ #vhost{virtual_host = Name, limits = Limits}.
+
+-spec new(vhost:name(), list(), map()) -> vhost().
+new(Name, Limits, _Metadata) ->
+ #vhost{virtual_host = Name, limits = Limits}.
+
+
+-spec record_version_to_use() -> vhost_v1.
+record_version_to_use() ->
+ ?record_version.
+
+-spec upgrade(vhost()) -> vhost().
+upgrade(#vhost{} = VHost) -> VHost.
+
+-spec upgrade_to(vhost_v1, vhost()) -> vhost().
+upgrade_to(?record_version, #vhost{} = VHost) ->
+ VHost.
+
+fields() -> fields(?record_version).
+
+fields(?record_version) -> record_info(fields, vhost).
+
+field_name() -> #vhost.virtual_host.
+
+info_keys() -> [name, tracing, cluster_state].
+
+-type vhost_pattern() :: vhost_v1_pattern().
+-type vhost_v1_pattern() :: #vhost{
+ virtual_host :: vhost:name() | '_',
+ limits :: '_'
+ }.
+
+-spec pattern_match_all() -> vhost_pattern().
+
+pattern_match_all() -> #vhost{_ = '_'}.
+
+get_name(#vhost{virtual_host = Value}) -> Value.
+get_limits(#vhost{limits = Value}) -> Value.
+
+get_metadata(_VHost) -> undefined.
+get_description(_VHost) -> undefined.
+get_tags(_VHost) -> undefined.
+
+set_limits(VHost, Value) ->
+ VHost#vhost{limits = Value}.