CQv1: Fix failure to recover messages in rare cases

When a full recovery was done it was possible to lose messages for v1 queues when the queues only had a journal file and no segment files. In practice it should be a rare event because it requires the queue (or maybe the node) to crash first and then the vhost or the node to be restarted gracefully.
author: Loïc Hoguin <lhoguin@vmware.com> 2022-08-04 12:30:38 +0200
committer: Loïc Hoguin <lhoguin@vmware.com> 2022-08-04 13:50:12 +0200
commit: 744e66e42ae9fd1d8cecf3700773968d925b98bd (patch)
tree: 7d2f9c279881e3ef79bff1a36f401676fa38e593
parent: 4faec42412d499cde370e6ebd680858eeeda7452 (diff)
download: rabbitmq-server-git-744e66e42ae9fd1d8cecf3700773968d925b98bd.tar.gz
3 files changed, 45 insertions, 6 deletions
diff --git a/deps/rabbit/BUILD.bazel b/deps/rabbit/BUILD.bazel
index c858cc3478..44a079f38f 100644
--- a/deps/rabbit/BUILD.bazel
+++ b/deps/rabbit/BUILD.bazel
@@ -320,7 +320,7 @@ suites = [
         PACKAGE,
         name = "classic_queue_prop_SUITE",
         size = "large",
-        shard_count = 4,
+        shard_count = 5,
         sharding_method = "case",
         deps = [
             "@proper//:erlang_app",
diff --git a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl
index d5c8bc0dae..6f1a4885cd 100644
--- a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl
+++ b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl
@@ -1127,7 +1127,7 @@ queue_index_walker_reader(#resource{ virtual_host = VHost } = Name, Gatherer) ->
     _ = [queue_index_walker_segment(filename:join(Dir, F), Gatherer) || F <- SegmentFiles],
     %% When there are files belonging to the v1 index, we go through
     %% the v1 index walker function as well.
-    case rabbit_file:wildcard(".*\\.idx", Dir) of
+    case rabbit_file:wildcard(".*\\.(idx|jif)", Dir) of
         [_|_] ->
             %% This function will call gatherer:finish/1, we do not
             %% need to call it here.
diff --git a/deps/rabbit/test/classic_queue_prop_SUITE.erl b/deps/rabbit/test/classic_queue_prop_SUITE.erl
index 61d97d5cd0..8814619b3d 100644
--- a/deps/rabbit/test/classic_queue_prop_SUITE.erl
+++ b/deps/rabbit/test/classic_queue_prop_SUITE.erl
@@ -74,7 +74,7 @@
 %% Common Test.
 
 all() ->
-    [{group, classic_queue_tests}].
+    [{group, classic_queue_tests}, {group, classic_queue_regressions}].
 
 groups() ->
     [{classic_queue_tests, [], [
@@ -83,7 +83,11 @@ groups() ->
         lazy_queue_v1,
         classic_queue_v2,
         lazy_queue_v2
-    ]}].
+     ]},
+     {classic_queue_regressions, [], [
+        reg_v1_full_recover_only_journal
+     ]}
+    ].
 
 init_per_suite(Config) ->
     rabbit_ct_helpers:log_environment(),
@@ -92,7 +96,7 @@ init_per_suite(Config) ->
 end_per_suite(Config) ->
     rabbit_ct_helpers:run_teardown_steps(Config).
 
-init_per_group(Group = classic_queue_tests, Config) ->
+init_per_group(Group, Config) ->
     Config1 = rabbit_ct_helpers:set_config(Config, [
         {rmq_nodename_suffix, Group},
         {rmq_nodes_count, 1},
@@ -117,7 +121,7 @@ init_per_group(Group = classic_queue_tests, Config) ->
         erlang, system_flag, [backtrace_depth, 16]),
     Config2.
 
-end_per_group(classic_queue_tests, Config) ->
+end_per_group(_, Config) ->
     rabbit_ct_helpers:run_steps(Config,
       rabbit_ct_client_helpers:teardown_steps() ++
       rabbit_ct_broker_helpers:teardown_steps()).
@@ -1155,3 +1159,38 @@ queue_fold(Fun, Acc0, {R, F}) when is_function(Fun, 2), is_list(R), is_list(F) -
     lists:foldr(Fun, Acc1, R);
 queue_fold(Fun, Acc0, Q) ->
     erlang:error(badarg, [Fun, Acc0, Q]).
+
+%% Regression tests.
+%%
+%% These tests are hard to reproduce by running the test suite normally
+%% because they require a very specific sequence of events.
+
+reg_v1_full_recover_only_journal(Config) ->
+    true = rabbit_ct_broker_helpers:rpc(Config, 0,
+        ?MODULE, do_reg_v1_full_recover_only_journal, [Config]).
+
+do_reg_v1_full_recover_only_journal(Config) ->
+
+    St0 = #cq{name=prop_classic_queue_v1, mode=lazy, version=1,
+              config=minimal_config(Config)},
+
+    Res1 = cmd_setup_queue(St0),
+    St3 = St0#cq{amq=Res1},
+
+    Res4 = cmd_channel_open(St3),
+    true = postcondition(St3, {call, undefined, cmd_channel_open, [St3]}, Res4),
+    St7 = next_state(St3, Res4, {call, undefined, cmd_channel_open, [St3]}),
+
+    Res8 = cmd_restart_queue_dirty(St7),
+    true = postcondition(St7, {call, undefined, cmd_restart_queue_dirty, [St7]}, Res8),
+    St11 = next_state(St7, Res8, {call, undefined, cmd_restart_queue_dirty, [St7]}),
+
+    Res12 = cmd_channel_publish_many(St11, Res4, 117, 4541, 2, true, undefined),
+    true = postcondition(St11, {call, undefined, cmd_channel_publish_many, [St11, Res4, 117, 4541, 2, true, undefined]}, Res12),
+    St14 = next_state(St11, Res12, {call, undefined, cmd_channel_publish_many, [St11, Res4, 117, 4541, 2, true, undefined]}),
+
+    Res15 = cmd_restart_vhost_clean(St14),
+    true = postcondition(St14, {call, undefined, cmd_restart_vhost_clean, [St14]}, Res15),
+    _ = next_state(St14, Res15, {call, undefined, cmd_restart_vhost_clean, [St14]}),
+
+    true.
author	Loïc Hoguin <lhoguin@vmware.com>	2022-08-04 12:30:38 +0200
committer	Loïc Hoguin <lhoguin@vmware.com>	2022-08-04 13:50:12 +0200
commit	744e66e42ae9fd1d8cecf3700773968d925b98bd (patch)
tree	7d2f9c279881e3ef79bff1a36f401676fa38e593
parent	4faec42412d499cde370e6ebd680858eeeda7452 (diff)
download	rabbitmq-server-git-744e66e42ae9fd1d8cecf3700773968d925b98bd.tar.gz