diff options
author | Nick Vatamaniuc <vatamane@gmail.com> | 2022-03-24 18:48:05 -0400 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2022-03-24 21:36:07 -0400 |
commit | 5ebc50685a98756cd1544c439df3ae5bce158670 (patch) | |
tree | 35528f2e42ccf965fc52dc0e519a54d2a9d066ed | |
parent | 2f56adb39df11499dfeea66d316ef7110f47458b (diff) | |
download | couchdb-5ebc50685a98756cd1544c439df3ae5bce158670.tar.gz |
Add test to ensure rev tree stemming doesn't take too much memory
`couch_key_tree:stem/2`, as seen in
https://github.com/apache/couchdb/pull/3963, has a potential to consume quite a
bit of memory. Replacing sets with maps helped in that case however, since
stemming has a non-tail recursive section, there is a chance in future versions
of Erlang to experience the same behavior again.
As a safeguard, add a memory limit test by stemming a larger conflicted rev
tree while limiting the maximum process heap size. For that, use the nifty
`max_heap_size` process flag, which ensures a process is killed if it starts
using too much memory.
To reduce the flakiness, use a deterministic tree shape by using a hard-coded
seed value and leave a decent margin of error for the memory limit.
Ref: https://github.com/apache/couchdb/pull/3963
-rw-r--r-- | src/couch/test/eunit/couch_key_tree_tests.erl | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/src/couch/test/eunit/couch_key_tree_tests.erl b/src/couch/test/eunit/couch_key_tree_tests.erl index f571139c9..d191541d4 100644 --- a/src/couch/test/eunit/couch_key_tree_tests.erl +++ b/src/couch/test/eunit/couch_key_tree_tests.erl @@ -96,7 +96,8 @@ key_tree_stemming_test_() -> [ should_have_no_effect_for_stemming_more_levels_than_exists(), should_return_one_deepest_node(), - should_return_two_deepest_nodes() + should_return_two_deepest_nodes(), + should_not_use_excessive_memory_when_stemming() ] }. @@ -528,3 +529,48 @@ should_return_two_deepest_nodes() -> merge_and_stem(RevTree, Tree) -> {Merged, Result} = couch_key_tree:merge(RevTree, Tree), {couch_key_tree:stem(Merged, ?DEPTH), Result}. + +should_not_use_excessive_memory_when_stemming() -> + ?_test(begin + % This is to preserve determinism + Seed = {1647, 841737, 351137}, + Tree = generate_rev_tree(1000, 0.006, Seed), + % Without the optimization #91de482fd66f4773b3b8583039c6bcaf1c5727ec + % stemming would consume about 18_000_000 words. With it, it consumes + % 6_000_000. So, use 13_000_000 as a threshold. + Opts = [ + monitor, + {max_heap_size, #{ + size => 13000000, + error_logger => false, + kill => true + }} + ], + {_Pid, Ref} = spawn_opt(couch_key_tree, stem, [Tree, 1000], Opts), + % When it uses too much memory exit would be `killed` + Exit = + receive + {'DOWN', Ref, _, _, Res} -> Res + end, + ?assertEqual(normal, Exit) + end). + +generate_rev_tree(Depth, BranchChance, Seed) -> + rand:seed(exrop, Seed), + [{1, revnode(Depth, BranchChance)}]. + +revnode(0, _) -> + {rev(), x, []}; +revnode(Depth, BranchChance) -> + case rand:uniform() < BranchChance of + true -> + {rev(), x, [ + revnode(Depth - 1, BranchChance), + revnode(Depth - 1, BranchChance) + ]}; + false -> + {rev(), x, [revnode(Depth - 1, BranchChance)]} + end. + +rev() -> + crypto:strong_rand_bytes(16). |