summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2022-06-14 15:47:44 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2022-06-17 14:48:30 -0400
commit63e09832a1af0c6980fafff04b1d7082267e586b (patch)
tree2e57504b1c439eeb38bf9b35155d71da88bacd7a
parent687b4e023587b1669bb506f923a094d7c3f81a65 (diff)
downloadcouchdb-63e09832a1af0c6980fafff04b1d7082267e586b.tar.gz
Ignore repeated interactive purges
After enabling default purge request replication, it was possible to get a `badreq` error because the internal replicator would race with the interactive worker updates. Since we're dealing with cluster-generated UUIDs if we see the same UUID, DocId, and Revs again, assume it was applied already and ignore it. For belt-and-suspenders protection ensure that revs lists don't have duplicates and are sorted. Do it both in fabric and couch_db in case an operator wants to do local purges. For extra safety also keep sorting the revs in the duplicate check for the rare case that the existing purge btree already contain duplicate revs. Eventually that check could be removed as all the new purge revs lists would be de-duplicated and sorted. As a bonus, if it turns out all the purges were applied simply return and avoid calling the updater process which can be an expensive bottleneck.
-rw-r--r--src/couch/src/couch_db.erl59
-rw-r--r--src/couch_pse_tests/src/cpse_test_purge_docs.erl15
-rw-r--r--src/fabric/src/fabric_doc_purge.erl2
3 files changed, 47 insertions, 29 deletions
diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index 70ba1c2b9..caf8157a0 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -397,34 +397,43 @@ when
PurgeOption :: interactive_edit | replicated_changes,
Reply :: {ok, []} | {ok, [Rev]}.
purge_docs(#db{main_pid = Pid} = Db, UUIDsIdsRevs, Options) ->
- UUIDsIdsRevs2 = [
- {UUID, couch_util:to_binary(Id), Revs}
+ UUIDsIdsRevs1 = [
+ {UUID, couch_util:to_binary(Id), lists:usort(Revs)}
|| {UUID, Id, Revs} <- UUIDsIdsRevs
],
- % Check here if any UUIDs already exist when
- % we're not replicating purge infos
- IsRepl = lists:member(replicated_changes, Options),
- if
- IsRepl ->
- ok;
- true ->
- UUIDs = [UUID || {UUID, _, _} <- UUIDsIdsRevs2],
- lists:foreach(
- fun(Resp) ->
- if
- Resp == not_found ->
- ok;
- true ->
- Fmt = "Duplicate purge info UIUD: ~s",
- Reason = io_lib:format(Fmt, [element(2, Resp)]),
- throw({badreq, Reason})
- end
- end,
- get_purge_infos(Db, UUIDs)
- )
+ % Gather any existing purges with the same UUIDs
+ UUIDs = element(1, lists:unzip3(UUIDsIdsRevs1)),
+ Old1 = get_purge_infos(Db, UUIDs),
+ Old2 = maps:from_list([{UUID, {Id, Revs}} || {_, UUID, Id, Revs} <- Old1]),
+ % Filter out all the purges which have already been processed
+ FilterCheckFun = fun({UUID, Id, Revs}) ->
+ case maps:is_key(UUID, Old2) of
+ true ->
+ #{UUID := {OldId, OldRevs}} = Old2,
+ case Id =:= OldId andalso lists:usort(Revs) =:= lists:usort(OldRevs) of
+ true ->
+ % Processed this request already, so filter it out
+ false;
+ false ->
+ % Oops! Same UUID but somehow different Id or Revs
+ Fmt = "Duplicate purge info UUID: ~s DocId: ~s",
+ Reason = lists:flatten(io_lib:format(Fmt, [UUID, Id])),
+ throw({badreq, Reason})
+ end;
+ false ->
+ % Have not seen this request yet, so we keep it
+ true
+ end
end,
- increment_stat(Db, [couchdb, database_purges]),
- gen_server:call(Pid, {purge_docs, UUIDsIdsRevs2, Options}).
+ UUIDsIdsRevs2 = lists:filter(FilterCheckFun, UUIDsIdsRevs1),
+ % If all the purges are already applied, skip calling the updater
+ case UUIDsIdsRevs2 of
+ [] ->
+ {ok, []};
+ [_ | _] ->
+ increment_stat(Db, [couchdb, database_purges]),
+ gen_server:call(Pid, {purge_docs, UUIDsIdsRevs2, Options})
+ end.
-spec get_purge_infos(#db{}, [UUId]) -> [PurgeInfo] when
UUId :: binary(),
diff --git a/src/couch_pse_tests/src/cpse_test_purge_docs.erl b/src/couch_pse_tests/src/cpse_test_purge_docs.erl
index f0ed3d747..cef439031 100644
--- a/src/couch_pse_tests/src/cpse_test_purge_docs.erl
+++ b/src/couch_pse_tests/src/cpse_test_purge_docs.erl
@@ -427,15 +427,24 @@ cpse_purge_repeated_uuid(DbName) ->
{purge_infos, []}
]),
+ UUID = cpse_util:uuid(),
+
PurgeInfos = [
- {cpse_util:uuid(), <<"foo1">>, [Rev]}
+ {UUID, <<"foo1">>, [Rev]}
],
{ok, [{ok, PRevs1}]} = cpse_util:purge(DbName, PurgeInfos),
?assertEqual([Rev], PRevs1),
- % Attempting to purge a repeated UUID is an error
- ?assertThrow({badreq, _}, cpse_util:purge(DbName, PurgeInfos)),
+ % Attempting to purge a repeated UUID is a no-op, it could have
+ % been a race with the internal replicator
+ ?assertEqual({ok, []}, cpse_util:purge(DbName, PurgeInfos)),
+
+ % Purging a repeated UUID with a different Id or Revs is an error
+ DifferentId = [{UUID, <<"foo2">>, [Rev]}],
+ ?assertThrow({badreq, _}, cpse_util:purge(DbName, DifferentId)),
+ DifferentRevs = [{UUID, <<"foo1">>, [Rev, <<"another">>]}],
+ ?assertThrow({badreq, _}, cpse_util:purge(DbName, DifferentRevs)),
% Although we can replicate it in
{ok, []} = cpse_util:purge(DbName, PurgeInfos, [replicated_changes]),
diff --git a/src/fabric/src/fabric_doc_purge.erl b/src/fabric/src/fabric_doc_purge.erl
index 64221ab0e..082666afb 100644
--- a/src/fabric/src/fabric_doc_purge.erl
+++ b/src/fabric/src/fabric_doc_purge.erl
@@ -132,7 +132,7 @@ create_reqs([], UUIDs, Reqs) ->
create_reqs([{Id, Revs} | RestIdsRevs], UUIDs, Reqs) ->
UUID = couch_uuids:new(),
NewUUIDs = [UUID | UUIDs],
- NewReqs = [{UUID, Id, Revs} | Reqs],
+ NewReqs = [{UUID, Id, lists:usort(Revs)} | Reqs],
create_reqs(RestIdsRevs, NewUUIDs, NewReqs).
group_reqs_by_shard(DbName, Reqs) ->