summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLoïc Hoguin <lhoguin@vmware.com>2022-08-04 12:30:38 +0200
committerLoïc Hoguin <lhoguin@vmware.com>2022-08-04 13:50:12 +0200
commit744e66e42ae9fd1d8cecf3700773968d925b98bd (patch)
tree7d2f9c279881e3ef79bff1a36f401676fa38e593
parent4faec42412d499cde370e6ebd680858eeeda7452 (diff)
downloadrabbitmq-server-git-744e66e42ae9fd1d8cecf3700773968d925b98bd.tar.gz
CQv1: Fix failure to recover messages in rare cases
When a full recovery was done it was possible to lose messages for v1 queues when the queues only had a journal file and no segment files. In practice it should be a rare event because it requires the queue (or maybe the node) to crash first and then the vhost or the node to be restarted gracefully.
-rw-r--r--deps/rabbit/BUILD.bazel2
-rw-r--r--deps/rabbit/src/rabbit_classic_queue_index_v2.erl2
-rw-r--r--deps/rabbit/test/classic_queue_prop_SUITE.erl47
3 files changed, 45 insertions, 6 deletions
diff --git a/deps/rabbit/BUILD.bazel b/deps/rabbit/BUILD.bazel
index c858cc3478..44a079f38f 100644
--- a/deps/rabbit/BUILD.bazel
+++ b/deps/rabbit/BUILD.bazel
@@ -320,7 +320,7 @@ suites = [
PACKAGE,
name = "classic_queue_prop_SUITE",
size = "large",
- shard_count = 4,
+ shard_count = 5,
sharding_method = "case",
deps = [
"@proper//:erlang_app",
diff --git a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl
index d5c8bc0dae..6f1a4885cd 100644
--- a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl
+++ b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl
@@ -1127,7 +1127,7 @@ queue_index_walker_reader(#resource{ virtual_host = VHost } = Name, Gatherer) ->
_ = [queue_index_walker_segment(filename:join(Dir, F), Gatherer) || F <- SegmentFiles],
%% When there are files belonging to the v1 index, we go through
%% the v1 index walker function as well.
- case rabbit_file:wildcard(".*\\.idx", Dir) of
+ case rabbit_file:wildcard(".*\\.(idx|jif)", Dir) of
[_|_] ->
%% This function will call gatherer:finish/1, we do not
%% need to call it here.
diff --git a/deps/rabbit/test/classic_queue_prop_SUITE.erl b/deps/rabbit/test/classic_queue_prop_SUITE.erl
index 61d97d5cd0..8814619b3d 100644
--- a/deps/rabbit/test/classic_queue_prop_SUITE.erl
+++ b/deps/rabbit/test/classic_queue_prop_SUITE.erl
@@ -74,7 +74,7 @@
%% Common Test.
all() ->
- [{group, classic_queue_tests}].
+ [{group, classic_queue_tests}, {group, classic_queue_regressions}].
groups() ->
[{classic_queue_tests, [], [
@@ -83,7 +83,11 @@ groups() ->
lazy_queue_v1,
classic_queue_v2,
lazy_queue_v2
- ]}].
+ ]},
+ {classic_queue_regressions, [], [
+ reg_v1_full_recover_only_journal
+ ]}
+ ].
init_per_suite(Config) ->
rabbit_ct_helpers:log_environment(),
@@ -92,7 +96,7 @@ init_per_suite(Config) ->
end_per_suite(Config) ->
rabbit_ct_helpers:run_teardown_steps(Config).
-init_per_group(Group = classic_queue_tests, Config) ->
+init_per_group(Group, Config) ->
Config1 = rabbit_ct_helpers:set_config(Config, [
{rmq_nodename_suffix, Group},
{rmq_nodes_count, 1},
@@ -117,7 +121,7 @@ init_per_group(Group = classic_queue_tests, Config) ->
erlang, system_flag, [backtrace_depth, 16]),
Config2.
-end_per_group(classic_queue_tests, Config) ->
+end_per_group(_, Config) ->
rabbit_ct_helpers:run_steps(Config,
rabbit_ct_client_helpers:teardown_steps() ++
rabbit_ct_broker_helpers:teardown_steps()).
@@ -1155,3 +1159,38 @@ queue_fold(Fun, Acc0, {R, F}) when is_function(Fun, 2), is_list(R), is_list(F) -
lists:foldr(Fun, Acc1, R);
queue_fold(Fun, Acc0, Q) ->
erlang:error(badarg, [Fun, Acc0, Q]).
+
+%% Regression tests.
+%%
+%% These tests are hard to reproduce by running the test suite normally
+%% because they require a very specific sequence of events.
+
+reg_v1_full_recover_only_journal(Config) ->
+ true = rabbit_ct_broker_helpers:rpc(Config, 0,
+ ?MODULE, do_reg_v1_full_recover_only_journal, [Config]).
+
+do_reg_v1_full_recover_only_journal(Config) ->
+
+ St0 = #cq{name=prop_classic_queue_v1, mode=lazy, version=1,
+ config=minimal_config(Config)},
+
+ Res1 = cmd_setup_queue(St0),
+ St3 = St0#cq{amq=Res1},
+
+ Res4 = cmd_channel_open(St3),
+ true = postcondition(St3, {call, undefined, cmd_channel_open, [St3]}, Res4),
+ St7 = next_state(St3, Res4, {call, undefined, cmd_channel_open, [St3]}),
+
+ Res8 = cmd_restart_queue_dirty(St7),
+ true = postcondition(St7, {call, undefined, cmd_restart_queue_dirty, [St7]}, Res8),
+ St11 = next_state(St7, Res8, {call, undefined, cmd_restart_queue_dirty, [St7]}),
+
+ Res12 = cmd_channel_publish_many(St11, Res4, 117, 4541, 2, true, undefined),
+ true = postcondition(St11, {call, undefined, cmd_channel_publish_many, [St11, Res4, 117, 4541, 2, true, undefined]}, Res12),
+ St14 = next_state(St11, Res12, {call, undefined, cmd_channel_publish_many, [St11, Res4, 117, 4541, 2, true, undefined]}),
+
+ Res15 = cmd_restart_vhost_clean(St14),
+ true = postcondition(St14, {call, undefined, cmd_restart_vhost_clean, [St14]}, Res15),
+ _ = next_state(St14, Res15, {call, undefined, cmd_restart_vhost_clean, [St14]}),
+
+ true.