Add two larger couch_key_tree:find_missing/2 tests

These were used to generate the random rev trees used for bechmarking. But they also assert useful properties for the function so let's add them to test_util. Since we added a shuffle/1 function to test_util, DRY some extra shuffle/1 functions as we seem to have a bunch of them sprinkled here and there. Also, since the rev tree generation code was changed, need to adjust the `excessive_memory_when_stemming` test as well. To do it, reverted the commit indicated in the test and then adjusted parameters until the test failed. Then reset the revert and it failed.
author: Nick Vatamaniuc <vatamane@gmail.com> 2022-06-09 18:52:56 -0400
committer: Nick Vatamaniuc <nickva@users.noreply.github.com> 2022-06-10 10:16:54 -0400
commit: 05cc7c9ab4fa90df550f37b41858fd8235230003 (patch)
tree: c41f2db159ac4d88b6262e413d8fe43f1b756a78
parent: d2b233ec0e04869394b03d5344aac99cfc28e1ab (diff)
download: couchdb-05cc7c9ab4fa90df550f37b41858fd8235230003.tar.gz
4 files changed, 89 insertions, 34 deletions
diff --git a/src/couch/src/test_util.erl b/src/couch/src/test_util.erl
index 9f6d758c5..dfd155df8 100644
--- a/src/couch/src/test_util.erl
+++ b/src/couch/src/test_util.erl
@@ -19,7 +19,7 @@
 
 -export([init_code_path/0]).
 -export([source_file/1, build_file/1]).
-%% -export([run/2]).
+-export([revtree_generate/4, revtree_get_revs/1, random_rev/0]).
 
 -export([start_couch/0, start_couch/1, start_couch/2, stop_couch/0, stop_couch/1]).
 -export([start_config/1, stop_config/1]).
@@ -40,6 +40,8 @@
 
 -export([fake_db/1]).
 
+-export([shuffle/1]).
+
 -record(test_context, {mocked = [], started = [], module}).
 
 -define(DEFAULT_APPS, [inets, ibrowse, ssl, config, couch_epi, couch_event, couch]).
@@ -427,3 +429,62 @@ sort_apps(Apps) ->
 
 weight_app(couch_log) -> {0.0, couch_log};
 weight_app(Else) -> {1.0, Else}.
+
+% Generate random rev trees
+%
+% Args:
+%   Depth : Max depth. This will be halfed every time we branch.
+%
+%   BranchChance : 0.0 to 1.0 chance of branching at each level.
+%
+%   WideBranches: 1/4 of the time when branching happens it will create
+%                 wide branches, this specifies the width of those branches.
+%
+% Example usage: revtree_generate(25, 0.25, 10, os:timestamp())
+
+revtree_generate(Depth, BranchChance, WideBranches, Seed) ->
+    rand:seed(exrop, Seed),
+    Rev = random_rev(),
+    {Seed, [{1, revnode(Rev, Depth, BranchChance, WideBranches)}]}.
+
+% Get all the revisions in the tree as a sorted [{Pos, Rev} ...] list
+%
+revtree_get_revs([{Pos, {_, _, _} = Node}]) when is_integer(Pos) ->
+    lists:sort(maps:keys(revs1(Pos, Node))).
+
+revnode(Rev, Depth, _, _) when Depth =< 0 ->
+    {Rev, x, []};
+revnode(Rev, Depth, BranchChance, WideBranches) ->
+    Choice = rand:uniform(),
+    {Revs, Depth1} =
+        if
+            Choice < BranchChance / 4 ->
+                {childrev(WideBranches), trunc(Depth / 2)};
+            Choice < BranchChance ->
+                {childrev(2), trunc(Depth / 2)};
+            true ->
+                {childrev(1), Depth - 1}
+        end,
+    {Rev, x, [revnode(R, Depth1, BranchChance, WideBranches) || R <- Revs]}.
+
+childrev(N) ->
+    lists:sort([random_rev() || _ <- lists:seq(1, N)]).
+
+revs1(Pos, {Rev, _Val, []}) ->
+    #{{Pos, Rev} => true};
+revs1(Pos, {Rev, _Val, Nodes}) ->
+    lists:foldl(
+        fun(N, Acc) ->
+            maps:merge(Acc, revs1(Pos + 1, N))
+        end,
+        #{{Pos, Rev} => true},
+        Nodes
+    ).
+
+random_rev() ->
+    ?l2b(couch_util:to_hex(crypto:strong_rand_bytes(16))).
+
+shuffle(List) ->
+    Paired = [{couch_rand:uniform(), I} || I <- List],
+    Sorted = lists:sort(Paired),
+    [I || {_, I} <- Sorted].
diff --git a/src/couch/test/eunit/couch_key_tree_tests.erl b/src/couch/test/eunit/couch_key_tree_tests.erl
index d191541d4..4898c21da 100644
--- a/src/couch/test/eunit/couch_key_tree_tests.erl
+++ b/src/couch/test/eunit/couch_key_tree_tests.erl
@@ -44,7 +44,9 @@ key_tree_missing_leaves_test_() ->
         "Missing tree leaves",
         [
             should_not_find_missing_leaves(),
-            should_find_missing_leaves()
+            should_find_missing_leaves(),
+            should_not_find_missing_leaves_large_test(),
+            should_find_missing_leaves_large_test()
         ]
     }.
 
@@ -342,6 +344,27 @@ should_find_missing_leaves() ->
         )
     ].
 
+should_not_find_missing_leaves_large_test() ->
+    ?_test(begin
+        % This is to preserve determinism
+        Seed = {1647, 841737, 351137},
+        {_, Tree} = test_util:revtree_generate(1000, 0.2, 5, Seed),
+        Revs1 = test_util:revtree_get_revs(Tree),
+        Revs2 = test_util:shuffle(Revs1),
+        ?assertEqual([], couch_key_tree:find_missing(Tree, Revs2))
+    end).
+
+should_find_missing_leaves_large_test() ->
+    ?_test(begin
+        % This is to preserve determinism
+        Seed = {1647, 841737, 351137},
+        {_, Tree} = test_util:revtree_generate(1000, 0.2, 5, Seed),
+        Seq = lists:seq(1, 1000),
+        Revs1 = [{couch_rand:uniform(1000), test_util:random_rev()} || _ <- Seq],
+        Revs2 = couch_key_tree:find_missing(Tree, Revs1),
+        ?assertEqual(lists:sort(Revs1), lists:sort(Revs2))
+    end).
+
 should_have_no_effect_on_removing_no_leaves() ->
     TwoChildSibs = [{0, {"1", "foo", [{"1a", "bar", []}, {"1b", "bar", []}]}}],
     ?_assertEqual(
@@ -534,7 +557,7 @@ should_not_use_excessive_memory_when_stemming() ->
     ?_test(begin
         % This is to preserve determinism
         Seed = {1647, 841737, 351137},
-        Tree = generate_rev_tree(1000, 0.006, Seed),
+        {_, Tree} = test_util:revtree_generate(1000, 0.06, 15, Seed),
         % Without the optimization #91de482fd66f4773b3b8583039c6bcaf1c5727ec
         % stemming would consume about 18_000_000 words. With it, it consumes
         % 6_000_000. So, use 13_000_000 as a threshold.
@@ -554,23 +577,3 @@ should_not_use_excessive_memory_when_stemming() ->
             end,
         ?assertEqual(normal, Exit)
     end).
-
-generate_rev_tree(Depth, BranchChance, Seed) ->
-    rand:seed(exrop, Seed),
-    [{1, revnode(Depth, BranchChance)}].
-
-revnode(0, _) ->
-    {rev(), x, []};
-revnode(Depth, BranchChance) ->
-    case rand:uniform() < BranchChance of
-        true ->
-            {rev(), x, [
-                revnode(Depth - 1, BranchChance),
-                revnode(Depth - 1, BranchChance)
-            ]};
-        false ->
-            {rev(), x, [revnode(Depth - 1, BranchChance)]}
-    end.
-
-rev() ->
-    crypto:strong_rand_bytes(16).
diff --git a/src/couch_pse_tests/src/cpse_test_fold_changes.erl b/src/couch_pse_tests/src/cpse_test_fold_changes.erl
index 91f7c63e9..86536b169 100644
--- a/src/couch_pse_tests/src/cpse_test_fold_changes.erl
+++ b/src/couch_pse_tests/src/cpse_test_fold_changes.erl
@@ -123,7 +123,7 @@ cpse_update_second_of_two(Db1) ->
     ?assertEqual([{<<"a">>, 1}, {<<"b">>, 3}], lists:reverse(Changes)).
 
 cpse_check_mutation_ordering(Db1) ->
-    Actions = shuffle(
+    Actions = test_util:shuffle(
         lists:map(
             fun(Seq) ->
                 {create, {docid(Seq), {[]}}}
@@ -169,11 +169,6 @@ do_mutation_ordering(Db, Seq, [{DocId, _OldSeq} | Rest], DocSeqAcc) ->
     ?assertEqual(Expected, lists:reverse(RevOrder)),
     do_mutation_ordering(NewDb, Seq + 1, Rest, NewAcc).
 
-shuffle(List) ->
-    Paired = [{couch_rand:uniform(), I} || I <- List],
-    Sorted = lists:sort(Paired),
-    [I || {_, I} <- Sorted].
-
 fold_fun(#full_doc_info{id = Id, update_seq = Seq}, Acc) ->
     {ok, [{Id, Seq} | Acc]}.
 
diff --git a/src/mem3/test/eunit/mem3_ring_prop_tests.erl b/src/mem3/test/eunit/mem3_ring_prop_tests.erl
index 3e2310b21..612499c4d 100644
--- a/src/mem3/test/eunit/mem3_ring_prop_tests.erl
+++ b/src/mem3/test/eunit/mem3_ring_prop_tests.erl
@@ -92,7 +92,7 @@ g_connected_intervals(Begin, End, Split) when Begin =< End ->
                     Ns = lists:seq(1, N - 1),
                     Bs = lists:usort([rand_range(Begin, End) || _ <- Ns]),
                     Es = [B - 1 || B <- Bs],
-                    shuffle(lists:zip([Begin] ++ Bs, Es ++ [End]))
+                    test_util:shuffle(lists:zip([Begin] ++ Bs, Es ++ [End]))
             end
         end
     ).
@@ -149,10 +149,6 @@ rand_range(B, B) ->
 rand_range(B, E) ->
     B + rand:uniform(E - B).
 
-shuffle(L) ->
-    Tagged = [{rand:uniform(), X} || X <- L],
-    [X || {_, X} <- lists:sort(Tagged)].
-
 endpoints(Ranges) ->
     {Begins, Ends} = lists:unzip(Ranges),
     sets:from_list(Begins ++ Ends).
author	Nick Vatamaniuc <vatamane@gmail.com>	2022-06-09 18:52:56 -0400
committer	Nick Vatamaniuc <nickva@users.noreply.github.com>	2022-06-10 10:16:54 -0400
commit	05cc7c9ab4fa90df550f37b41858fd8235230003 (patch)
tree	c41f2db159ac4d88b6262e413d8fe43f1b756a78
parent	d2b233ec0e04869394b03d5344aac99cfc28e1ab (diff)
download	couchdb-05cc7c9ab4fa90df550f37b41858fd8235230003.tar.gz