diff options
Diffstat (limited to 'lib/stdlib/src/sets.erl')
-rw-r--r-- | lib/stdlib/src/sets.erl | 71 |
1 files changed, 40 insertions, 31 deletions
diff --git a/lib/stdlib/src/sets.erl b/lib/stdlib/src/sets.erl index bdc0ed40f3..dccc6dcf3a 100644 --- a/lib/stdlib/src/sets.erl +++ b/lib/stdlib/src/sets.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2000-2021. All Rights Reserved. +%% Copyright Ericsson AB 2000-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -89,12 +89,12 @@ %%------------------------------------------------------------------------------ %% new() -> Set --spec new() -> set(). +-spec new() -> set(none()). new() -> Empty = mk_seg(?seg_size), #set{empty = Empty, segs = {Empty}}. --spec new([{version, 1..2}]) -> set(). +-spec new([{version, 1..2}]) -> set(none()). new([{version, 2}]) -> #{}; new(Opts) -> @@ -358,31 +358,51 @@ is_disjoint_1(Set, Iter) -> Set2 :: set(Element), Set3 :: set(Element). -subtract(#{}=S1, #{}=S2) -> - Next = maps:next(maps:iterator(S1)), - subtract_heuristic(Next, [], [], floor(map_size(S1) * 0.75), S1, S2); -subtract(S1, S2) -> - filter(fun (E) -> not is_element(E, S2) end, S1). +subtract(#{}=LHS, #{}=RHS) -> + LSize = map_size(LHS), + RSize = map_size(RHS), + + case RSize =< (LSize div 4) of + true -> + %% If we're guaranteed to keep more than 75% of the keys, it's + %% always cheaper to delete them one-by-one from the start. + Next = maps:next(maps:iterator(RHS)), + subtract_decided(Next, LHS, RHS); + false -> + %% We might delete more than 25% of the keys. Dynamically + %% transition to deleting elements one-by-one if we can determine + %% that we'll keep more than 75%. + KeepThreshold = (LSize * 3) div 4, + Next = maps:next(maps:iterator(LHS)), + subtract_heuristic(Next, [], [], KeepThreshold, LHS, RHS) + end; +subtract(LHS, RHS) -> + filter(fun (E) -> not is_element(E, RHS) end, LHS). -%% If we are keeping more than 75% of the keys, then it is -%% cheaper to delete them. Stop accumulating and start deleting. subtract_heuristic(Next, _Keep, Delete, 0, Acc, Reference) -> - subtract_decided(Next, remove_keys(Delete, Acc), Reference); -subtract_heuristic({Key, _Value, Iterator}, Keep, Delete, KeepCount, Acc, Reference) -> + %% We've kept more than 75% of the keys, transition to removing them + %% one-by-one. + subtract_decided(Next, remove_keys(Delete, Acc), Reference); +subtract_heuristic({Key, _Value, Iterator}, Keep, Delete, + KeepCount, Acc, Reference) -> Next = maps:next(Iterator), case Reference of - #{Key := _} -> - subtract_heuristic(Next, Keep, [Key | Delete], KeepCount, Acc, Reference); + #{ Key := _ } -> + subtract_heuristic(Next, Keep, [Key | Delete], + KeepCount, Acc, Reference); _ -> - subtract_heuristic(Next, [Key | Keep], Delete, KeepCount - 1, Acc, Reference) + subtract_heuristic(Next, [Key | Keep], Delete, + KeepCount - 1, Acc, Reference) end; subtract_heuristic(none, Keep, _Delete, _Count, _Acc, _Reference) -> maps:from_keys(Keep, ?VALUE). subtract_decided({Key, _Value, Iterator}, Acc, Reference) -> case Reference of - #{Key := _} -> - subtract_decided(maps:next(Iterator), maps:remove(Key, Acc), Reference); + #{ Key := _ } -> + subtract_decided(maps:next(Iterator), + maps:remove(Key, Acc), + Reference); _ -> subtract_decided(maps:next(Iterator), Acc, Reference) end; @@ -446,23 +466,12 @@ fold_1(Fun, Acc, Iter) -> Set1 :: set(Element), Set2 :: set(Element). filter(F, #{}=D) when is_function(F, 1)-> - maps:from_keys(filter_1(F, maps:iterator(D)), ?VALUE); + %% For this purpose, it is more efficient to use + %% maps:from_keys than a map comprehension. + maps:from_keys([K || K := _ <- D, F(K)], ?VALUE); filter(F, #set{}=D) when is_function(F, 1)-> filter_set(F, D). -filter_1(Fun, Iter) -> - case maps:next(Iter) of - {K, _, NextIter} -> - case Fun(K) of - true -> - [K | filter_1(Fun, NextIter)]; - false -> - filter_1(Fun, NextIter) - end; - none -> - [] - end. - %% get_slot(Hashdb, Key) -> Slot. %% Get the slot. First hash on the new range, if we hit a bucket %% which has not been split use the unsplit buddy bucket. |