summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2022-04-02 01:31:19 -0400
committerNick Vatamaniuc <vatamane@apache.org>2022-04-05 23:31:38 -0400
commit2e4be0460b4f00361dd0c96534916f3f8ef2e794 (patch)
tree9f56dbd80ef7f26a8459cb5fc692dc822df114ac
parent39a7438518e703cd1ee82b850a6ceda3f448d295 (diff)
downloadcouchdb-2e4be0460b4f00361dd0c96534916f3f8ef2e794.tar.gz
Optimize key tree stemming by using maps instead of sets
sets to due to a compiler bug in 22 consumes too much memory and is slower on Erlang 22+ Reproducer: https://gist.github.com/nickva/ddc499e6e05678faf20d344c6e11daaa With sets: ``` couch_key_tree:gen_and_stem(). {8,6848.812683105469} ``` With maps: ``` couch_key_tree:gen_and_stem(). {0,544.000732421875} ``` This is a cherry-pick of https://github.com/apache/couchdb/commit/91de482fd66f4773b3b8583039c6bcaf1c5727ec
-rw-r--r--src/couch/src/couch_key_tree.erl10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/couch/src/couch_key_tree.erl b/src/couch/src/couch_key_tree.erl
index 94150418e..7811d9532 100644
--- a/src/couch/src/couch_key_tree.erl
+++ b/src/couch/src/couch_key_tree.erl
@@ -471,7 +471,7 @@ stem(Trees, Limit) ->
{_, Branches} = lists:foldl(fun(Tree, {Seen, TreeAcc}) ->
{NewSeen, NewBranches} = stem_tree(Tree, Limit, Seen),
{NewSeen, NewBranches ++ TreeAcc}
- end, {sets:new(), []}, Trees),
+ end, {#{}, []}, Trees),
lists:sort(Branches)
catch throw:dupe_keys ->
repair_tree(Trees, Limit)
@@ -522,11 +522,11 @@ stem_tree(Depth, {Key, Val, Children}, Limit, Seen0) ->
check_key(Key, Seen) ->
- case sets:is_element(Key, Seen) of
- true ->
+ case Seen of
+ #{Key := true} ->
throw(dupe_keys);
- false ->
- sets:add_element(Key, Seen)
+ _ ->
+ Seen#{Key => true}
end.