diff options
author | Nick Vatamaniuc <vatamane@gmail.com> | 2022-06-14 15:47:44 -0400 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2022-06-17 14:48:30 -0400 |
commit | 63e09832a1af0c6980fafff04b1d7082267e586b (patch) | |
tree | 2e57504b1c439eeb38bf9b35155d71da88bacd7a | |
parent | 687b4e023587b1669bb506f923a094d7c3f81a65 (diff) | |
download | couchdb-63e09832a1af0c6980fafff04b1d7082267e586b.tar.gz |
Ignore repeated interactive purges
After enabling default purge request replication, it was possible to get a
`badreq` error because the internal replicator would race with the interactive
worker updates. Since we're dealing with cluster-generated UUIDs if we see the
same UUID, DocId, and Revs again, assume it was applied already and ignore it.
For belt-and-suspenders protection ensure that revs lists don't have duplicates
and are sorted. Do it both in fabric and couch_db in case an operator wants to
do local purges. For extra safety also keep sorting the revs in the duplicate
check for the rare case that the existing purge btree already contain duplicate
revs. Eventually that check could be removed as all the new purge revs lists
would be de-duplicated and sorted.
As a bonus, if it turns out all the purges were applied simply return and avoid
calling the updater process which can be an expensive bottleneck.
-rw-r--r-- | src/couch/src/couch_db.erl | 59 | ||||
-rw-r--r-- | src/couch_pse_tests/src/cpse_test_purge_docs.erl | 15 | ||||
-rw-r--r-- | src/fabric/src/fabric_doc_purge.erl | 2 |
3 files changed, 47 insertions, 29 deletions
diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 70ba1c2b9..caf8157a0 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -397,34 +397,43 @@ when PurgeOption :: interactive_edit | replicated_changes, Reply :: {ok, []} | {ok, [Rev]}. purge_docs(#db{main_pid = Pid} = Db, UUIDsIdsRevs, Options) -> - UUIDsIdsRevs2 = [ - {UUID, couch_util:to_binary(Id), Revs} + UUIDsIdsRevs1 = [ + {UUID, couch_util:to_binary(Id), lists:usort(Revs)} || {UUID, Id, Revs} <- UUIDsIdsRevs ], - % Check here if any UUIDs already exist when - % we're not replicating purge infos - IsRepl = lists:member(replicated_changes, Options), - if - IsRepl -> - ok; - true -> - UUIDs = [UUID || {UUID, _, _} <- UUIDsIdsRevs2], - lists:foreach( - fun(Resp) -> - if - Resp == not_found -> - ok; - true -> - Fmt = "Duplicate purge info UIUD: ~s", - Reason = io_lib:format(Fmt, [element(2, Resp)]), - throw({badreq, Reason}) - end - end, - get_purge_infos(Db, UUIDs) - ) + % Gather any existing purges with the same UUIDs + UUIDs = element(1, lists:unzip3(UUIDsIdsRevs1)), + Old1 = get_purge_infos(Db, UUIDs), + Old2 = maps:from_list([{UUID, {Id, Revs}} || {_, UUID, Id, Revs} <- Old1]), + % Filter out all the purges which have already been processed + FilterCheckFun = fun({UUID, Id, Revs}) -> + case maps:is_key(UUID, Old2) of + true -> + #{UUID := {OldId, OldRevs}} = Old2, + case Id =:= OldId andalso lists:usort(Revs) =:= lists:usort(OldRevs) of + true -> + % Processed this request already, so filter it out + false; + false -> + % Oops! Same UUID but somehow different Id or Revs + Fmt = "Duplicate purge info UUID: ~s DocId: ~s", + Reason = lists:flatten(io_lib:format(Fmt, [UUID, Id])), + throw({badreq, Reason}) + end; + false -> + % Have not seen this request yet, so we keep it + true + end end, - increment_stat(Db, [couchdb, database_purges]), - gen_server:call(Pid, {purge_docs, UUIDsIdsRevs2, Options}). + UUIDsIdsRevs2 = lists:filter(FilterCheckFun, UUIDsIdsRevs1), + % If all the purges are already applied, skip calling the updater + case UUIDsIdsRevs2 of + [] -> + {ok, []}; + [_ | _] -> + increment_stat(Db, [couchdb, database_purges]), + gen_server:call(Pid, {purge_docs, UUIDsIdsRevs2, Options}) + end. -spec get_purge_infos(#db{}, [UUId]) -> [PurgeInfo] when UUId :: binary(), diff --git a/src/couch_pse_tests/src/cpse_test_purge_docs.erl b/src/couch_pse_tests/src/cpse_test_purge_docs.erl index f0ed3d747..cef439031 100644 --- a/src/couch_pse_tests/src/cpse_test_purge_docs.erl +++ b/src/couch_pse_tests/src/cpse_test_purge_docs.erl @@ -427,15 +427,24 @@ cpse_purge_repeated_uuid(DbName) -> {purge_infos, []} ]), + UUID = cpse_util:uuid(), + PurgeInfos = [ - {cpse_util:uuid(), <<"foo1">>, [Rev]} + {UUID, <<"foo1">>, [Rev]} ], {ok, [{ok, PRevs1}]} = cpse_util:purge(DbName, PurgeInfos), ?assertEqual([Rev], PRevs1), - % Attempting to purge a repeated UUID is an error - ?assertThrow({badreq, _}, cpse_util:purge(DbName, PurgeInfos)), + % Attempting to purge a repeated UUID is a no-op, it could have + % been a race with the internal replicator + ?assertEqual({ok, []}, cpse_util:purge(DbName, PurgeInfos)), + + % Purging a repeated UUID with a different Id or Revs is an error + DifferentId = [{UUID, <<"foo2">>, [Rev]}], + ?assertThrow({badreq, _}, cpse_util:purge(DbName, DifferentId)), + DifferentRevs = [{UUID, <<"foo1">>, [Rev, <<"another">>]}], + ?assertThrow({badreq, _}, cpse_util:purge(DbName, DifferentRevs)), % Although we can replicate it in {ok, []} = cpse_util:purge(DbName, PurgeInfos, [replicated_changes]), diff --git a/src/fabric/src/fabric_doc_purge.erl b/src/fabric/src/fabric_doc_purge.erl index 64221ab0e..082666afb 100644 --- a/src/fabric/src/fabric_doc_purge.erl +++ b/src/fabric/src/fabric_doc_purge.erl @@ -132,7 +132,7 @@ create_reqs([], UUIDs, Reqs) -> create_reqs([{Id, Revs} | RestIdsRevs], UUIDs, Reqs) -> UUID = couch_uuids:new(), NewUUIDs = [UUID | UUIDs], - NewReqs = [{UUID, Id, Revs} | Reqs], + NewReqs = [{UUID, Id, lists:usort(Revs)} | Reqs], create_reqs(RestIdsRevs, NewUUIDs, NewReqs). group_reqs_by_shard(DbName, Reqs) -> |