summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2022-03-24 18:48:05 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2022-03-24 21:36:07 -0400
commit5ebc50685a98756cd1544c439df3ae5bce158670 (patch)
tree35528f2e42ccf965fc52dc0e519a54d2a9d066ed
parent2f56adb39df11499dfeea66d316ef7110f47458b (diff)
downloadcouchdb-5ebc50685a98756cd1544c439df3ae5bce158670.tar.gz
Add test to ensure rev tree stemming doesn't take too much memory
`couch_key_tree:stem/2`, as seen in https://github.com/apache/couchdb/pull/3963, has a potential to consume quite a bit of memory. Replacing sets with maps helped in that case however, since stemming has a non-tail recursive section, there is a chance in future versions of Erlang to experience the same behavior again. As a safeguard, add a memory limit test by stemming a larger conflicted rev tree while limiting the maximum process heap size. For that, use the nifty `max_heap_size` process flag, which ensures a process is killed if it starts using too much memory. To reduce the flakiness, use a deterministic tree shape by using a hard-coded seed value and leave a decent margin of error for the memory limit. Ref: https://github.com/apache/couchdb/pull/3963
-rw-r--r--src/couch/test/eunit/couch_key_tree_tests.erl48
1 files changed, 47 insertions, 1 deletions
diff --git a/src/couch/test/eunit/couch_key_tree_tests.erl b/src/couch/test/eunit/couch_key_tree_tests.erl
index f571139c9..d191541d4 100644
--- a/src/couch/test/eunit/couch_key_tree_tests.erl
+++ b/src/couch/test/eunit/couch_key_tree_tests.erl
@@ -96,7 +96,8 @@ key_tree_stemming_test_() ->
[
should_have_no_effect_for_stemming_more_levels_than_exists(),
should_return_one_deepest_node(),
- should_return_two_deepest_nodes()
+ should_return_two_deepest_nodes(),
+ should_not_use_excessive_memory_when_stemming()
]
}.
@@ -528,3 +529,48 @@ should_return_two_deepest_nodes() ->
merge_and_stem(RevTree, Tree) ->
{Merged, Result} = couch_key_tree:merge(RevTree, Tree),
{couch_key_tree:stem(Merged, ?DEPTH), Result}.
+
+should_not_use_excessive_memory_when_stemming() ->
+ ?_test(begin
+ % This is to preserve determinism
+ Seed = {1647, 841737, 351137},
+ Tree = generate_rev_tree(1000, 0.006, Seed),
+ % Without the optimization #91de482fd66f4773b3b8583039c6bcaf1c5727ec
+ % stemming would consume about 18_000_000 words. With it, it consumes
+ % 6_000_000. So, use 13_000_000 as a threshold.
+ Opts = [
+ monitor,
+ {max_heap_size, #{
+ size => 13000000,
+ error_logger => false,
+ kill => true
+ }}
+ ],
+ {_Pid, Ref} = spawn_opt(couch_key_tree, stem, [Tree, 1000], Opts),
+ % When it uses too much memory exit would be `killed`
+ Exit =
+ receive
+ {'DOWN', Ref, _, _, Res} -> Res
+ end,
+ ?assertEqual(normal, Exit)
+ end).
+
+generate_rev_tree(Depth, BranchChance, Seed) ->
+ rand:seed(exrop, Seed),
+ [{1, revnode(Depth, BranchChance)}].
+
+revnode(0, _) ->
+ {rev(), x, []};
+revnode(Depth, BranchChance) ->
+ case rand:uniform() < BranchChance of
+ true ->
+ {rev(), x, [
+ revnode(Depth - 1, BranchChance),
+ revnode(Depth - 1, BranchChance)
+ ]};
+ false ->
+ {rev(), x, [revnode(Depth - 1, BranchChance)]}
+ end.
+
+rev() ->
+ crypto:strong_rand_bytes(16).