summaryrefslogtreecommitdiff
path: root/lib/stdlib/src/sets.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib/src/sets.erl')
-rw-r--r--lib/stdlib/src/sets.erl71
1 files changed, 40 insertions, 31 deletions
diff --git a/lib/stdlib/src/sets.erl b/lib/stdlib/src/sets.erl
index bdc0ed40f3..dccc6dcf3a 100644
--- a/lib/stdlib/src/sets.erl
+++ b/lib/stdlib/src/sets.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2000-2021. All Rights Reserved.
+%% Copyright Ericsson AB 2000-2023. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -89,12 +89,12 @@
%%------------------------------------------------------------------------------
%% new() -> Set
--spec new() -> set().
+-spec new() -> set(none()).
new() ->
Empty = mk_seg(?seg_size),
#set{empty = Empty, segs = {Empty}}.
--spec new([{version, 1..2}]) -> set().
+-spec new([{version, 1..2}]) -> set(none()).
new([{version, 2}]) ->
#{};
new(Opts) ->
@@ -358,31 +358,51 @@ is_disjoint_1(Set, Iter) ->
Set2 :: set(Element),
Set3 :: set(Element).
-subtract(#{}=S1, #{}=S2) ->
- Next = maps:next(maps:iterator(S1)),
- subtract_heuristic(Next, [], [], floor(map_size(S1) * 0.75), S1, S2);
-subtract(S1, S2) ->
- filter(fun (E) -> not is_element(E, S2) end, S1).
+subtract(#{}=LHS, #{}=RHS) ->
+ LSize = map_size(LHS),
+ RSize = map_size(RHS),
+
+ case RSize =< (LSize div 4) of
+ true ->
+ %% If we're guaranteed to keep more than 75% of the keys, it's
+ %% always cheaper to delete them one-by-one from the start.
+ Next = maps:next(maps:iterator(RHS)),
+ subtract_decided(Next, LHS, RHS);
+ false ->
+ %% We might delete more than 25% of the keys. Dynamically
+ %% transition to deleting elements one-by-one if we can determine
+ %% that we'll keep more than 75%.
+ KeepThreshold = (LSize * 3) div 4,
+ Next = maps:next(maps:iterator(LHS)),
+ subtract_heuristic(Next, [], [], KeepThreshold, LHS, RHS)
+ end;
+subtract(LHS, RHS) ->
+ filter(fun (E) -> not is_element(E, RHS) end, LHS).
-%% If we are keeping more than 75% of the keys, then it is
-%% cheaper to delete them. Stop accumulating and start deleting.
subtract_heuristic(Next, _Keep, Delete, 0, Acc, Reference) ->
- subtract_decided(Next, remove_keys(Delete, Acc), Reference);
-subtract_heuristic({Key, _Value, Iterator}, Keep, Delete, KeepCount, Acc, Reference) ->
+ %% We've kept more than 75% of the keys, transition to removing them
+ %% one-by-one.
+ subtract_decided(Next, remove_keys(Delete, Acc), Reference);
+subtract_heuristic({Key, _Value, Iterator}, Keep, Delete,
+ KeepCount, Acc, Reference) ->
Next = maps:next(Iterator),
case Reference of
- #{Key := _} ->
- subtract_heuristic(Next, Keep, [Key | Delete], KeepCount, Acc, Reference);
+ #{ Key := _ } ->
+ subtract_heuristic(Next, Keep, [Key | Delete],
+ KeepCount, Acc, Reference);
_ ->
- subtract_heuristic(Next, [Key | Keep], Delete, KeepCount - 1, Acc, Reference)
+ subtract_heuristic(Next, [Key | Keep], Delete,
+ KeepCount - 1, Acc, Reference)
end;
subtract_heuristic(none, Keep, _Delete, _Count, _Acc, _Reference) ->
maps:from_keys(Keep, ?VALUE).
subtract_decided({Key, _Value, Iterator}, Acc, Reference) ->
case Reference of
- #{Key := _} ->
- subtract_decided(maps:next(Iterator), maps:remove(Key, Acc), Reference);
+ #{ Key := _ } ->
+ subtract_decided(maps:next(Iterator),
+ maps:remove(Key, Acc),
+ Reference);
_ ->
subtract_decided(maps:next(Iterator), Acc, Reference)
end;
@@ -446,23 +466,12 @@ fold_1(Fun, Acc, Iter) ->
Set1 :: set(Element),
Set2 :: set(Element).
filter(F, #{}=D) when is_function(F, 1)->
- maps:from_keys(filter_1(F, maps:iterator(D)), ?VALUE);
+ %% For this purpose, it is more efficient to use
+ %% maps:from_keys than a map comprehension.
+ maps:from_keys([K || K := _ <- D, F(K)], ?VALUE);
filter(F, #set{}=D) when is_function(F, 1)->
filter_set(F, D).
-filter_1(Fun, Iter) ->
- case maps:next(Iter) of
- {K, _, NextIter} ->
- case Fun(K) of
- true ->
- [K | filter_1(Fun, NextIter)];
- false ->
- filter_1(Fun, NextIter)
- end;
- none ->
- []
- end.
-
%% get_slot(Hashdb, Key) -> Slot.
%% Get the slot. First hash on the new range, if we hit a bucket
%% which has not been split use the unsplit buddy bucket.