diff options
author | Tony Sun <tony.sun427@gmail.com> | 2022-01-18 17:37:22 -0800 |
---|---|---|
committer | Tony Sun <tony.sun427@gmail.com> | 2022-01-20 20:58:34 -0800 |
commit | 8406e853747580e05580f67604fdc61567ec3ace (patch) | |
tree | 704fa9cb31cf2585b19b8ff154ed7842a1c780fc | |
parent | 55d8278ac405883d494f3f9dca46073035c8e308 (diff) | |
download | couchdb-8406e853747580e05580f67604fdc61567ec3ace.tar.gz |
Fix new_edits:false and VDU function_clausenew-edits-false-bug
In a rare, unsupported scenario, a user can call _bulk_docs with
new_edits:false and a VDU that denies the update of certain documents.
When nodes are out of sync (under load and mem3_repl can't keep up),
an update may or may not update the revision tree.
When two nodes extend the revision tree and forbids the update,
but one node is behind, an {error, W, [{Doc, FirstReply} | Acc]}
is returned instead of {ok, _} or {accepted, _}. The _bulk_docs
request does not accept {error, _} which leads to a function_clause.
This fix changes the return value to:
{accepted, W, [{Doc, {forbidden, Msg}} | Acc]}
when any of the nodes forbids the update.
Updating one document is also addressed so if the same issue occurs
during one document update, a 403 is still returned.
This should not affect replication since replication understands
forbidden return values and should ignore the update respectively.
-rw-r--r-- | src/fabric/src/fabric.erl | 5 | ||||
-rw-r--r-- | src/fabric/src/fabric_doc_update.erl | 139 |
2 files changed, 137 insertions, 7 deletions
diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 5840cd63c..77ca8b7d9 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -326,6 +326,11 @@ update_doc(DbName, Doc, Options) -> {ok, NewRev}; {accepted, [{accepted, NewRev}]} -> {accepted, NewRev}; + % This is a corner case when we update one document when + % news_edits:false, VDU forbids updating the document, + % and revision trees are out of sync across nodes. + {accepted, [{{_Id, _Rev}, {forbidden, Msg}}]} -> + throw({forbidden, Msg}); {ok, [{{_Id, _Rev}, Error}]} -> throw(Error); {ok, [Error]} -> diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl index 62b639bab..7cb8efda5 100644 --- a/src/fabric/src/fabric_doc_update.erl +++ b/src/fabric/src/fabric_doc_update.erl @@ -157,16 +157,17 @@ force_reply(Doc, [FirstReply | _] = Replies, {Health, W, Acc}) -> false -> CounterKey = [fabric, doc_update, mismatched_errors], couch_stats:increment_counter(CounterKey), - {error, W, [{Doc, FirstReply} | Acc]} + case check_forbidden_msg(Replies, Health) of + {NewHealth, forbidden, Msg} -> + {NewHealth, W, [{Doc, {forbidden, Msg}} | Acc]}; + false -> + {error, W, [{Doc, FirstReply} | Acc]} + end end; [AcceptedRev | _] -> CounterKey = [fabric, doc_update, write_quorum_errors], couch_stats:increment_counter(CounterKey), - NewHealth = - case Health of - ok -> accepted; - _ -> Health - end, + NewHealth = change_health_accepted(Health), {NewHealth, W, [{Doc, {accepted, AcceptedRev}} | Acc]} end end. @@ -182,6 +183,35 @@ maybe_reply(Doc, Replies, {stop, W, Acc}) -> continue end. +% This is a corner case where +% 1) revision tree for the document are out of sync across nodes +% 2) update on one node extends the revision tree +% 3) VDU forbids the document +% 4) remaining nodes do not extend revision tree, so noreply is returned +% If at at least one nodes forbids the update, then we reject +% the update no matter what other nodes return. +check_forbidden_msg(Replies, Health) -> + Pred = fun(R) -> + case R of + {_, {forbidden, _}} -> + true; + _ -> + false + end + end, + case lists:search(Pred, Replies) of + {value, {_, {forbidden, Msg}}} -> + NewHealth = change_health_accepted(Health), + {NewHealth, forbidden, Msg}; + false -> + false + end. + +change_health_accepted(ok) -> + accepted; +change_health_accepted(Health) -> + Health. + update_quorum_met(W, Replies) -> Counters = lists:foldl( fun(R, D) -> orddict:update_counter(R, 1, D) end, @@ -255,6 +285,8 @@ validate_atomic_update(_DbName, AllDocs, true) -> throw({aborted, PreCommitFailures}). -ifdef(TEST). + +-define(NODEBUG, false). -include_lib("eunit/include/eunit.hrl"). setup_all() -> @@ -273,7 +305,10 @@ doc_update_test_() -> [ fun doc_update1/0, fun doc_update2/0, - fun doc_update3/0 + fun doc_update3/0, + fun one_forbid_test/0, + fun two_forbid_test/0, + fun extend_tree_forbid_test/0 ] }. @@ -406,6 +441,96 @@ doc_update3() -> ?assertEqual({ok, [{Doc1, {ok, Doc1}}, {Doc2, {ok, Doc2}}]}, Reply). +one_forbid_test() -> + Doc1 = #doc{revs = {1, [<<"foo">>]}}, + Doc2 = #doc{revs = {1, [<<"bar">>]}}, + Docs = [Doc1, Doc2], + Shards = + mem3_util:create_partition_map("foo", 3, 1, ["node1", "node2", "node3"]), + GroupedDocs = group_docs_by_shard_hack(<<"foo">>, Shards, Docs), + + Acc0 = { + length(Shards), + length(Docs), + list_to_integer("2"), + GroupedDocs, + dict:from_list([{Doc, []} || Doc <- Docs]) + }, + + {ok, {WaitingCount1, _, _, _, _} = Acc1} = + handle_message({ok, [{ok, Doc1}, {noreply, Doc2}]}, hd(Shards), Acc0), + ?assertEqual(WaitingCount1, 2), + + {ok, {WaitingCount2, _, _, _, _} = Acc2} = + handle_message({ok, [{ok, Doc1}, {noreply, Doc2}]}, lists:nth(2, Shards), Acc1), + ?assertEqual(WaitingCount2, 1), + + {stop, Reply} = + handle_message({ok, [{ok, Doc1}, {Doc2, {forbidden, <<"not allowed">>}}]}, lists:nth(3, Shards), Acc2), + + ?assertEqual({accepted, [{Doc1, {ok, Doc1}}, {Doc2, {forbidden, <<"not allowed">>}}]}, Reply). + +two_forbid_test() -> + Doc1 = #doc{revs = {1, [<<"foo">>]}}, + Doc2 = #doc{revs = {1, [<<"bar">>]}}, + Docs = [Doc1, Doc2], + Shards = + mem3_util:create_partition_map("foo", 3, 1, ["node1", "node2", "node3"]), + GroupedDocs = group_docs_by_shard_hack(<<"foo">>, Shards, Docs), + + Acc0 = { + length(Shards), + length(Docs), + list_to_integer("2"), + GroupedDocs, + dict:from_list([{Doc, []} || Doc <- Docs]) + }, + + {ok, {WaitingCount1, _, _, _, _} = Acc1} = + handle_message({ok, [{ok, Doc1}, {noreply, Doc2}]}, hd(Shards), Acc0), + ?assertEqual(WaitingCount1, 2), + + {ok, {WaitingCount2, _, _, _, _} = Acc2} = + handle_message({ok, [{ok, Doc1}, {Doc2, {forbidden, <<"not allowed">>}}]}, lists:nth(2, Shards), Acc1), + ?assertEqual(WaitingCount2, 1), + + {stop, Reply} = + handle_message({ok, [{ok, Doc1}, {Doc2, {forbidden, <<"not allowed">>}}]}, lists:nth(3, Shards), Acc2), + + ?assertEqual({accepted, [{Doc1, {ok, Doc1}}, {Doc2, {forbidden, <<"not allowed">>}}]}, Reply). + +% This should actually never happen, because an `{ok, Doc}` message means that the revision +% tree is extended and so the VDU should forbid the document. +% Leaving this test here to make sure quorum rules still apply. +extend_tree_forbid_test() -> + Doc1 = #doc{revs = {1, [<<"foo">>]}}, + Doc2 = #doc{revs = {1, [<<"bar">>]}}, + Docs = [Doc1, Doc2], + Shards = + mem3_util:create_partition_map("foo", 3, 1, ["node1", "node2", "node3"]), + GroupedDocs = group_docs_by_shard_hack(<<"foo">>, Shards, Docs), + + Acc0 = { + length(Shards), + length(Docs), + list_to_integer("2"), + GroupedDocs, + dict:from_list([{Doc, []} || Doc <- Docs]) + }, + + {ok, {WaitingCount1, _, _, _, _} = Acc1} = + handle_message({ok, [{ok, Doc1}, {ok, Doc2}]}, hd(Shards), Acc0), + ?assertEqual(WaitingCount1, 2), + + {ok, {WaitingCount2, _, _, _, _} = Acc2} = + handle_message({ok, [{ok, Doc1}, {Doc2, {forbidden, <<"not allowed">>}}]}, lists:nth(2, Shards), Acc1), + ?assertEqual(WaitingCount2, 1), + + {stop, Reply} = + handle_message({ok, [{ok, Doc1}, {ok, Doc2}]}, lists:nth(3, Shards), Acc2), + + ?assertEqual({ok, [{Doc1, {ok, Doc1}}, {Doc2, {ok, Doc2}}]}, Reply). + % needed for testing to avoid having to start the mem3 application group_docs_by_shard_hack(_DbName, Shards, Docs) -> dict:to_list( |