summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2022-01-07 14:07:55 -0500
committerNick Vatamaniuc <nickva@users.noreply.github.com>2022-01-07 17:16:46 -0500
commit3ae8c035448fe7a032d9459d07f8b04ed5e99f74 (patch)
tree5f4ef84d7967e23a2f91b6b2ec21e9d14dd1f68b
parent186818c9f69b08508fdcc9bc42b38108e7508c78 (diff)
downloadcouchdb-3ae8c035448fe7a032d9459d07f8b04ed5e99f74.tar.gz
Fix changes feed rewinds after a shard move with no subsequent db updates
Previously, when a database shard is moved to a new node, and there are no subsequent updates, the changes feed sequence rewound to the previous epoch. In case of a first shard move, it would rewind to 0. To fix the issue, update `owner_of/2` and `start_seq/3` functions to account for the case when epoch sequence can exactly match the current db update sequence. Fixes #3885
-rw-r--r--src/couch/src/couch_db.erl14
1 files changed, 10 insertions, 4 deletions
diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index 18ef9c998..fa003e16d 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -1793,15 +1793,15 @@ owner_of(Db, Seq) when not is_list(Db) ->
owner_of(get_epochs(Db), Seq);
owner_of([], _Seq) ->
undefined;
-owner_of([{EpochNode, EpochSeq} | _Rest], Seq) when Seq > EpochSeq ->
+owner_of([{EpochNode, EpochSeq} | _Rest], Seq) when Seq >= EpochSeq ->
EpochNode;
owner_of([_ | Rest], Seq) ->
owner_of(Rest, Seq).
-start_seq([{OrigNode, EpochSeq} | _], OrigNode, Seq) when Seq > EpochSeq ->
+start_seq([{OrigNode, EpochSeq} | _], OrigNode, Seq) when Seq >= EpochSeq ->
%% OrigNode is the owner of the Seq so we can safely stream from there
Seq;
-start_seq([{_, NewSeq}, {OrigNode, _} | _], OrigNode, Seq) when Seq > NewSeq ->
+start_seq([{_, NewSeq}, {OrigNode, _} | _], OrigNode, Seq) when Seq >= NewSeq ->
%% We transferred this file before Seq was written on OrigNode, so we need
%% to stream from the beginning of the next epoch. Note that it is _not_
%% necessary for the current node to own the epoch beginning at NewSeq
@@ -2314,6 +2314,8 @@ t_calculate_start_seq_shard_move() ->
Db = test_util:fake_db([]),
% Sequence when shard was on node1
?assertEqual(2, calculate_start_seq(Db, node1, {2, <<"foo">>})),
+ % Shard moved to node2 with no other updates after the move to node2
+ ?assertEqual(10, calculate_start_seq(Db, node2, {10, <<"foo">>})),
% Sequence from node1 after the move happened, we reset back to the
% start of the epoch on node2 = 10
?assertEqual(10, calculate_start_seq(Db, node1, {16, <<"foo">>})),
@@ -2323,11 +2325,15 @@ t_calculate_start_seq_shard_move() ->
is_owner_test() ->
?assertNot(is_owner(foo, 1, [])),
- ?assertNot(is_owner(foo, 1, [{foo, 1}])),
+ ?assertNot(is_owner(foo, 1, [{foo, 2}])),
+ ?assert(is_owner(foo, 1, [{foo, 1}])),
?assert(is_owner(foo, 2, [{foo, 1}])),
?assert(is_owner(foo, 50, [{bar, 100}, {foo, 1}])),
?assert(is_owner(foo, 50, [{baz, 200}, {bar, 100}, {foo, 1}])),
?assert(is_owner(bar, 150, [{baz, 200}, {bar, 100}, {foo, 1}])),
+ ?assert(is_owner(bar, 100, [{baz, 200}, {bar, 100}, {foo, 1}])),
+ ?assertNot(is_owner(bar, 99, [{baz, 200}, {bar, 100}, {foo, 1}])),
+ ?assertNot(is_owner(baz, 199, [{baz, 200}, {bar, 100}, {foo, 1}])),
?assertError(duplicate_epoch, validate_epochs([{foo, 1}, {bar, 1}])),
?assertError(epoch_order, validate_epochs([{foo, 100}, {bar, 200}])).