summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Sun <tony.sun427@gmail.com>2022-01-18 17:37:22 -0800
committerTony Sun <tony.sun427@gmail.com>2022-01-20 20:58:34 -0800
commit8406e853747580e05580f67604fdc61567ec3ace (patch)
tree704fa9cb31cf2585b19b8ff154ed7842a1c780fc
parent55d8278ac405883d494f3f9dca46073035c8e308 (diff)
downloadcouchdb-new-edits-false-bug.tar.gz
Fix new_edits:false and VDU function_clausenew-edits-false-bug
In a rare, unsupported scenario, a user can call _bulk_docs with new_edits:false and a VDU that denies the update of certain documents. When nodes are out of sync (under load and mem3_repl can't keep up), an update may or may not update the revision tree. When two nodes extend the revision tree and forbids the update, but one node is behind, an {error, W, [{Doc, FirstReply} | Acc]} is returned instead of {ok, _} or {accepted, _}. The _bulk_docs request does not accept {error, _} which leads to a function_clause. This fix changes the return value to: {accepted, W, [{Doc, {forbidden, Msg}} | Acc]} when any of the nodes forbids the update. Updating one document is also addressed so if the same issue occurs during one document update, a 403 is still returned. This should not affect replication since replication understands forbidden return values and should ignore the update respectively.
-rw-r--r--src/fabric/src/fabric.erl5
-rw-r--r--src/fabric/src/fabric_doc_update.erl139
2 files changed, 137 insertions, 7 deletions
diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl
index 5840cd63c..77ca8b7d9 100644
--- a/src/fabric/src/fabric.erl
+++ b/src/fabric/src/fabric.erl
@@ -326,6 +326,11 @@ update_doc(DbName, Doc, Options) ->
{ok, NewRev};
{accepted, [{accepted, NewRev}]} ->
{accepted, NewRev};
+ % This is a corner case when we update one document when
+ % news_edits:false, VDU forbids updating the document,
+ % and revision trees are out of sync across nodes.
+ {accepted, [{{_Id, _Rev}, {forbidden, Msg}}]} ->
+ throw({forbidden, Msg});
{ok, [{{_Id, _Rev}, Error}]} ->
throw(Error);
{ok, [Error]} ->
diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl
index 62b639bab..7cb8efda5 100644
--- a/src/fabric/src/fabric_doc_update.erl
+++ b/src/fabric/src/fabric_doc_update.erl
@@ -157,16 +157,17 @@ force_reply(Doc, [FirstReply | _] = Replies, {Health, W, Acc}) ->
false ->
CounterKey = [fabric, doc_update, mismatched_errors],
couch_stats:increment_counter(CounterKey),
- {error, W, [{Doc, FirstReply} | Acc]}
+ case check_forbidden_msg(Replies, Health) of
+ {NewHealth, forbidden, Msg} ->
+ {NewHealth, W, [{Doc, {forbidden, Msg}} | Acc]};
+ false ->
+ {error, W, [{Doc, FirstReply} | Acc]}
+ end
end;
[AcceptedRev | _] ->
CounterKey = [fabric, doc_update, write_quorum_errors],
couch_stats:increment_counter(CounterKey),
- NewHealth =
- case Health of
- ok -> accepted;
- _ -> Health
- end,
+ NewHealth = change_health_accepted(Health),
{NewHealth, W, [{Doc, {accepted, AcceptedRev}} | Acc]}
end
end.
@@ -182,6 +183,35 @@ maybe_reply(Doc, Replies, {stop, W, Acc}) ->
continue
end.
+% This is a corner case where
+% 1) revision tree for the document are out of sync across nodes
+% 2) update on one node extends the revision tree
+% 3) VDU forbids the document
+% 4) remaining nodes do not extend revision tree, so noreply is returned
+% If at at least one nodes forbids the update, then we reject
+% the update no matter what other nodes return.
+check_forbidden_msg(Replies, Health) ->
+ Pred = fun(R) ->
+ case R of
+ {_, {forbidden, _}} ->
+ true;
+ _ ->
+ false
+ end
+ end,
+ case lists:search(Pred, Replies) of
+ {value, {_, {forbidden, Msg}}} ->
+ NewHealth = change_health_accepted(Health),
+ {NewHealth, forbidden, Msg};
+ false ->
+ false
+ end.
+
+change_health_accepted(ok) ->
+ accepted;
+change_health_accepted(Health) ->
+ Health.
+
update_quorum_met(W, Replies) ->
Counters = lists:foldl(
fun(R, D) -> orddict:update_counter(R, 1, D) end,
@@ -255,6 +285,8 @@ validate_atomic_update(_DbName, AllDocs, true) ->
throw({aborted, PreCommitFailures}).
-ifdef(TEST).
+
+-define(NODEBUG, false).
-include_lib("eunit/include/eunit.hrl").
setup_all() ->
@@ -273,7 +305,10 @@ doc_update_test_() ->
[
fun doc_update1/0,
fun doc_update2/0,
- fun doc_update3/0
+ fun doc_update3/0,
+ fun one_forbid_test/0,
+ fun two_forbid_test/0,
+ fun extend_tree_forbid_test/0
]
}.
@@ -406,6 +441,96 @@ doc_update3() ->
?assertEqual({ok, [{Doc1, {ok, Doc1}}, {Doc2, {ok, Doc2}}]}, Reply).
+one_forbid_test() ->
+ Doc1 = #doc{revs = {1, [<<"foo">>]}},
+ Doc2 = #doc{revs = {1, [<<"bar">>]}},
+ Docs = [Doc1, Doc2],
+ Shards =
+ mem3_util:create_partition_map("foo", 3, 1, ["node1", "node2", "node3"]),
+ GroupedDocs = group_docs_by_shard_hack(<<"foo">>, Shards, Docs),
+
+ Acc0 = {
+ length(Shards),
+ length(Docs),
+ list_to_integer("2"),
+ GroupedDocs,
+ dict:from_list([{Doc, []} || Doc <- Docs])
+ },
+
+ {ok, {WaitingCount1, _, _, _, _} = Acc1} =
+ handle_message({ok, [{ok, Doc1}, {noreply, Doc2}]}, hd(Shards), Acc0),
+ ?assertEqual(WaitingCount1, 2),
+
+ {ok, {WaitingCount2, _, _, _, _} = Acc2} =
+ handle_message({ok, [{ok, Doc1}, {noreply, Doc2}]}, lists:nth(2, Shards), Acc1),
+ ?assertEqual(WaitingCount2, 1),
+
+ {stop, Reply} =
+ handle_message({ok, [{ok, Doc1}, {Doc2, {forbidden, <<"not allowed">>}}]}, lists:nth(3, Shards), Acc2),
+
+ ?assertEqual({accepted, [{Doc1, {ok, Doc1}}, {Doc2, {forbidden, <<"not allowed">>}}]}, Reply).
+
+two_forbid_test() ->
+ Doc1 = #doc{revs = {1, [<<"foo">>]}},
+ Doc2 = #doc{revs = {1, [<<"bar">>]}},
+ Docs = [Doc1, Doc2],
+ Shards =
+ mem3_util:create_partition_map("foo", 3, 1, ["node1", "node2", "node3"]),
+ GroupedDocs = group_docs_by_shard_hack(<<"foo">>, Shards, Docs),
+
+ Acc0 = {
+ length(Shards),
+ length(Docs),
+ list_to_integer("2"),
+ GroupedDocs,
+ dict:from_list([{Doc, []} || Doc <- Docs])
+ },
+
+ {ok, {WaitingCount1, _, _, _, _} = Acc1} =
+ handle_message({ok, [{ok, Doc1}, {noreply, Doc2}]}, hd(Shards), Acc0),
+ ?assertEqual(WaitingCount1, 2),
+
+ {ok, {WaitingCount2, _, _, _, _} = Acc2} =
+ handle_message({ok, [{ok, Doc1}, {Doc2, {forbidden, <<"not allowed">>}}]}, lists:nth(2, Shards), Acc1),
+ ?assertEqual(WaitingCount2, 1),
+
+ {stop, Reply} =
+ handle_message({ok, [{ok, Doc1}, {Doc2, {forbidden, <<"not allowed">>}}]}, lists:nth(3, Shards), Acc2),
+
+ ?assertEqual({accepted, [{Doc1, {ok, Doc1}}, {Doc2, {forbidden, <<"not allowed">>}}]}, Reply).
+
+% This should actually never happen, because an `{ok, Doc}` message means that the revision
+% tree is extended and so the VDU should forbid the document.
+% Leaving this test here to make sure quorum rules still apply.
+extend_tree_forbid_test() ->
+ Doc1 = #doc{revs = {1, [<<"foo">>]}},
+ Doc2 = #doc{revs = {1, [<<"bar">>]}},
+ Docs = [Doc1, Doc2],
+ Shards =
+ mem3_util:create_partition_map("foo", 3, 1, ["node1", "node2", "node3"]),
+ GroupedDocs = group_docs_by_shard_hack(<<"foo">>, Shards, Docs),
+
+ Acc0 = {
+ length(Shards),
+ length(Docs),
+ list_to_integer("2"),
+ GroupedDocs,
+ dict:from_list([{Doc, []} || Doc <- Docs])
+ },
+
+ {ok, {WaitingCount1, _, _, _, _} = Acc1} =
+ handle_message({ok, [{ok, Doc1}, {ok, Doc2}]}, hd(Shards), Acc0),
+ ?assertEqual(WaitingCount1, 2),
+
+ {ok, {WaitingCount2, _, _, _, _} = Acc2} =
+ handle_message({ok, [{ok, Doc1}, {Doc2, {forbidden, <<"not allowed">>}}]}, lists:nth(2, Shards), Acc1),
+ ?assertEqual(WaitingCount2, 1),
+
+ {stop, Reply} =
+ handle_message({ok, [{ok, Doc1}, {ok, Doc2}]}, lists:nth(3, Shards), Acc2),
+
+ ?assertEqual({ok, [{Doc1, {ok, Doc1}}, {Doc2, {ok, Doc2}}]}, Reply).
+
% needed for testing to avoid having to start the mem3 application
group_docs_by_shard_hack(_DbName, Shards, Docs) ->
dict:to_list(