1 files changed, 531 insertions, 0 deletions
diff --git a/src/couch/test/couch_key_tree_prop_tests.erl b/src/couch/test/couch_key_tree_prop_tests.erl
new file mode 100644
index 000000000..604a8285a
--- /dev/null
+++ b/src/couch/test/couch_key_tree_prop_tests.erl
@@ -0,0 +1,531 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_key_tree_prop_tests).
+
+-include_lib("triq/include/triq.hrl").
+-triq(eunit).
+-include_lib("eunit/include/eunit.hrl").
+
+-define(SIZE_REDUCTION, 3).  % How much to reduce size with tree depth.
+-define(MAX_BRANCHES, 4).  % Maximum number of branches.
+-define(RAND_SIZE, 1 bsl 64).
+
+
+%
+% Properties
+%
+
+
+% Merge random paths from a revtree into itself. Check that no revisions have
+% been lost in the process and that result is one of the 3 expected values.
+%
+prop_revtree_merge_with_subset_of_own_nodes() ->
+    ?FORALL(Revs, g_revs(),
+        ?FORALL({RevTree, Branch}, {g_revtree(Revs), g_revtree(Revs, 1)},
+            ?IMPLIES(length(Branch) > 0 andalso repeating_revs(levels(RevTree ++ Branch)) == [],
+                begin
+                    {Merged, Result} = couch_key_tree:merge(RevTree, hd(Branch)),
+                    lists:member(Result, [new_leaf, new_branch, internal_node])
+                        andalso same_keys(RevTree ++ Branch, Merged)
+                        andalso valid_revtree(Merged)
+                end
+            )
+        )
+    ).
+
+
+% Merge random trees into revtree.
+%
+prop_revtree_merge_random_nodes() ->
+    ?FORALL({RevTree, Branch}, {g_revtree(), g_revtree([], 1)},
+        ?IMPLIES(length(Branch) > 0,
+            begin
+                {Merged, _} = couch_key_tree:merge(RevTree, hd(Branch)),
+                valid_revtree(Merged)
+            end
+        )
+    ).
+
+
+
+% Merge mix or random and existing revtree paths into revtree
+%
+prop_revtree_merge_some_existing_some_new() ->
+    ?FORALL(RevTree, g_revtree(),
+        ?FORALL(Branch,
+            begin
+                KeyList = keylist(RevTree),
+                Half = lists:sublist(KeyList, length(KeyList) div 2),
+                g_revtree(Half, 1)
+            end,
+            ?IMPLIES(length(Branch) > 0 andalso repeating_revs(levels(RevTree ++ Branch)) == [],
+                 begin
+                     {Merged, _} = couch_key_tree:merge(RevTree, hd(Branch)),
+                     valid_revtree(Merged)
+                 end
+            )
+        )
+    ).
+
+
+
+% Stem deeper than the current max level. Expect no changes to the revtree
+%
+prop_no_change_stemming_deeper_than_current_depth() ->
+    ?FORALL(RevTree, g_revtree(),
+        begin
+            StemDepth = depth(RevTree) + 1,
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            StemmedKeys = lists:usort(keylist(Stemmed)),
+            InputKeys = lists:usort(keylist(RevTree)),
+            StemmedKeys == InputKeys
+        end
+    ).
+
+
+% Stem at a random small depth, make sure that resulting tree has
+% unique revisions and the same number or less revisions than input
+%
+prop_stemming_results_in_same_or_less_total_revs() ->
+    ?FORALL({RevTree, StemDepth}, {g_revtree(), choose(1, 20)},
+        begin
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            OldRealDepth = real_depth(RevTree),
+            StemmedKeys = keylist(Stemmed),
+            UniqueStemmedKeys = lists:usort(StemmedKeys),
+            UniqueInputKeys = lists:usort(keylist(RevTree)),
+            NewRealDepth = real_depth(Stemmed),
+            length(StemmedKeys) == length(UniqueStemmedKeys)
+                andalso length(UniqueStemmedKeys) =< length(UniqueInputKeys)
+                andalso OldRealDepth >= NewRealDepth
+        end
+    ).
+
+
+% Generate a longer path (revtree with no branches) then stem it.
+% Always expect it to shrink to stemmed depth.
+prop_stem_path_expect_size_to_get_smaller() ->
+    ?FORALL({RevTree, StemDepth},
+        {
+            ?SIZED(Size, resize(Size * 10, g_revtree([], 1))),
+            choose(1,5)
+        },
+        ?IMPLIES(real_depth(RevTree) > 5,
+            begin
+                Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+                StemmedKeys = lists:usort(keylist(Stemmed)),
+                InputKeys = lists:usort(keylist(RevTree)),
+                length(InputKeys) > length(StemmedKeys)
+                    andalso real_depth(Stemmed) == StemDepth
+            end
+        )
+    ).
+
+
+% After stemming all leaves are still present
+prop_after_stemming_all_leaves_are_present() ->
+    ?FORALL({RevTree, StemDepth},
+        {g_revtree(), choose(1,20)},
+        begin
+            OldRealDepth = real_depth(RevTree),
+            OldLeaves = leaves(RevTree),
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            NewRealDepth = real_depth(Stemmed),
+            NewLeaves = leaves(Stemmed),
+            valid_revtree(Stemmed)
+               andalso OldRealDepth >= NewRealDepth
+               andalso OldLeaves == NewLeaves
+
+        end
+    ).
+
+
+% After stemming paths to root didn't get longer
+prop_after_stemming_paths_are_shorter() ->
+    ?FORALL({StemDepth, RevTree}, {choose(2,10), g_revtree()},
+        begin
+            OldPaths = paths(RevTree),
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            NewPaths = paths(Stemmed),
+            GrowingPaths = orddict:fold(fun(Rev, Path, Acc) ->
+               OldPath = orddict:fetch(Rev, OldPaths),
+               case length(Path) > length(OldPath) of
+                  true ->
+                       [{Rev, Path, OldPath}| Acc];
+                   false ->
+                       Acc
+               end
+            end, [], NewPaths),
+            valid_revtree(Stemmed) andalso GrowingPaths == []
+        end
+    ).
+
+
+% Check leaf count
+prop_leaf_count() ->
+    ?FORALL(RevTree, g_revtree(),
+        length(leaves(RevTree)) == couch_key_tree:count_leafs(RevTree)
+    ).
+
+
+% Check get leafs
+prop_get_leafs() ->
+    ?FORALL(RevTree, g_revtree(),
+        begin
+            LeafsFull = couch_key_tree:get_all_leafs(RevTree),
+            lists:usort([Rev || {_V, {_D, [Rev | _]}} <- LeafsFull]) == leaves(RevTree)
+        end
+    ).
+
+
+%
+% Generators
+%
+
+% Generate a full rev tree. Most of the forms are just there to set up default
+% parameters, _revtree/3 does all heavy lifting.
+%
+
+g_revtree() ->
+    ?SIZED(Size, g_revtree(Size)).
+
+
+g_revtree(Size) when is_integer(Size) ->
+    g_revtree(Size, [], ?MAX_BRANCHES);
+g_revtree(Revs) when is_list(Revs) ->
+    ?SIZED(Size, g_revtree(Size, Revs, ?MAX_BRANCHES)).
+
+
+g_revtree(Size, Revs) when is_integer(Size), is_list(Revs) ->
+    g_revtree(Size, Revs, ?MAX_BRANCHES);
+g_revtree(Revs, MaxBranches) when is_list(Revs), is_integer(MaxBranches) ->
+    ?SIZED(Size, g_revtree(Size, Revs, MaxBranches)).
+
+
+g_revtree(0, _Revs, _MaxBranches) ->
+    [];
+g_revtree(Size, ERevs, MaxBranches) ->
+    ?LET({Depth, Revs}, {g_stem_depth(Size), g_revs(Size, ERevs)},
+         [{Depth, g_treenode(Size, Revs, MaxBranches)}]
+    ).
+
+
+% Generate a tree node and then recursively generate its children.
+%
+g_treenode(0, Revs, _) ->
+    {elements(Revs), x, []};
+g_treenode(Size, Revs, MaxBranches) ->
+    ?DELAY(?LET(N, int(0, MaxBranches),
+        begin
+            [Rev | ChildRevs] = Revs,
+            {Rev, x, g_nodes(Size div ?SIZE_REDUCTION, N, ChildRevs, MaxBranches)}
+        end
+    )).
+
+
+% Generate a list of child nodes. Depending on how many children there are
+% the pre-generarated revision list is split into that many sublists.
+%
+g_nodes(0, _N, _Revs, _MaxBranches) ->
+    [];
+g_nodes(_Size, 0, _Revs, _MaxBranches) ->
+    [];
+g_nodes(Size, ChildCount, Revs, MaxBranches) ->
+    ?LETSHRINK(
+        ChildNodes,
+        begin
+            ChildRevList = child_revs(ChildCount, Revs, Size, MaxBranches),
+            [g_treenode(Size, ChildRevs, MaxBranches) || ChildRevs <- ChildRevList]
+        end,
+        ordered_nodes(ChildNodes)
+    ).
+
+
+% Generate each subtree's stem depth
+%
+
+
+g_stem_depth(Size) ->
+    choose(0,  expected_height(Size, ?SIZE_REDUCTION) div 2).
+
+
+% Uses the shuffle/1 function to shuffle the input list. Unshuffled list is
+% used as the shrink value.
+%
+g_shuffle(L) when is_list(L) ->
+    triq_dom:domain(g_shuffle,
+        fun(Self, _Size) -> {Self, shuffle(L)} end,
+        fun(Self, _Val) -> {Self, L} end
+     ).
+
+
+% Wrapper to make a list shuffling generator that doesn't shrink
+%
+g_shuffle_noshrink(L) when is_list(L) ->
+    triq_dom:noshrink(g_shuffle(L)).
+
+
+% Generate shuffled sublists up to N items long from a list.
+%
+g_shuffled_sublists(L, N) ->
+    ?LET(Shuffled, g_shuffle_noshrink(L), lists:sublist(Shuffled, N)).
+
+
+% Generate revision lists.
+%
+g_revs() ->
+    ?SIZED(Size, g_revs(Size)).
+
+
+g_revs(Size) when is_integer(Size) ->
+    g_revs(Size, []).
+
+
+g_revs(Size, Existing) when is_integer(Size), is_list(Existing) ->
+    Expected = keys_needed(Size, ?SIZE_REDUCTION, ?MAX_BRANCHES),
+    Revs = revs(Expected, Existing),
+    case length(Revs) > Expected of
+        true -> % have extra, try various sublists
+            g_shuffled_sublists(Revs, Expected);
+        false ->
+            triq_dom:return(Revs)
+    end.
+
+
+%
+% Helper functions
+%
+
+
+valid_revtree(RevTree) ->
+    repeating_revs(levels(RevTree)) == [] andalso children_sorted(RevTree).
+
+
+same_keys(RevTree1, RevTree2) ->
+    Keys1 = lists:usort(keylist(RevTree1)),
+    Keys2 = lists:usort(keylist(RevTree2)),
+    Keys1 == Keys2.
+
+
+all(L) ->
+    lists:all(fun(E) -> E end, L).
+
+% Shufle a list of items. Tag each item with a random number then sort
+% the list and remove the tags.
+%
+shuffle(L) ->
+    Tagged = [{triq_rnd:uniform(), X} || X <- L],
+    [X || {_, X} <- lists:sort(Tagged)].
+
+
+% Generate list of relateively unique large random numbers
+rand_list(N) when N =< 0 ->
+    [];
+rand_list(N) ->
+    [triq_rnd:uniform(?RAND_SIZE) || _ <- lists:seq(1, N)].
+
+
+% Generate a list of revisions to be used as key in revision trees. Expected
+% must the number of maximum expected nodes in a revision tree. Existing is an
+% optional list revisions which must be included in the result. The output list
+% is sorted.
+revs(0, _Existing) ->
+    [];
+revs(Expected, Existing) when is_integer(Expected), is_list(Existing) ->
+    Need = Expected - length(Existing),
+    lists:usort(lists:append(Existing, rand_list(Need))).
+
+
+% Get the list of all the keys in a revision tree. The input can also be a
+% an individual tree (tagged with the depth to virtual root) or a node.
+% Yes, this is not tail recursive but the idea is to keep it simple.
+%
+keylist({_D, Node}) when is_tuple(Node) ->
+    keylist(Node);
+keylist({K, _V, Nodes}) ->
+    [K | keylist(Nodes)];
+keylist(Nodes) ->
+    lists:append([keylist(Node) || Node <- Nodes]).
+
+
+% Get the list of leaves from a revision tree.
+leaves([]) ->
+    [];
+leaves({_D, Node}) when is_tuple(Node) ->
+    leaves(Node);
+leaves({K, _V, []}) ->
+    [K];
+leaves({_K, _V, Nodes}) ->
+    leaves(Nodes);
+leaves(Nodes) ->
+    lists:usort(lists:append([leaves(N) || N <- Nodes])).
+
+
+% Get paths from leaf to root. Result is an orddict of [{LeafRev, [Rev]}]
+%
+paths([]) ->
+    orddict:new();
+paths(RevTree) when is_list(RevTree) ->
+    paths_merge_dicts([paths(T) || T <- RevTree]);
+paths({_Depth, Node}) when is_tuple(Node) ->
+    paths(Node);
+paths({K, _V, []}) ->
+    orddict:store(K, [], orddict:new());
+paths({K, _V, Nodes}) ->
+    CombinedDict = paths_merge_dicts([paths(N) || N <- Nodes]),
+    orddict:map(fun(_LeafKey, Path) -> Path ++ [K] end, CombinedDict).
+
+
+paths_merge_dicts(Dicts) ->
+    lists:foldl(fun(D, AccD) ->
+        orddict:merge(fun(K, V1, V2) ->
+            throw({found_duplicates, K, V1, V2})
+         end, D, AccD)
+    end, orddict:new(), Dicts).
+
+
+% Get lists of all the keys at each depth level. Result is an orddict that
+% looks like [{depth, [key]}]. The depth used here is the "virtual" depth as
+% indicated by the stemmed depth tag that goes with every top level subtree.
+%
+levels([]) ->
+    orddict:new();
+levels(RevTree) when is_list(RevTree) ->
+    lists:foldl(fun(T, Dict) -> levels(T, Dict) end, orddict:new(), RevTree).
+
+
+levels({Depth, Node}, Dict) when is_tuple(Node) ->
+    levels(Node, Depth, Dict).
+
+
+levels({K, _V, Nodes}, Depth, Dict) ->
+    Dict1 = case orddict:is_key(Depth, Dict) of
+        true -> orddict:append(Depth, K, Dict);
+        false -> orddict:store(Depth, [K], Dict)
+    end,
+    levels(Nodes, Depth + 1, Dict1);
+levels(Nodes, Depth, Dict) ->
+    lists:foldl(fun(Node, AccDict) ->
+        levels(Node, Depth, AccDict)
+    end, Dict, Nodes).
+
+
+% Using the output of leaves/1 as input return any repeating revisions if
+% there are any at a particular level. Levels which have not revisions are
+% not returned.
+%
+repeating_revs(Dict) ->
+    orddict:filter(fun(_Depth, Revs) ->
+        length(lists:usort(Revs)) =/= length(Revs)
+    end, Dict).
+
+
+% Check that children of all nodes are sorted
+children_sorted([]) ->
+    true;
+children_sorted(Nodes) when is_list(Nodes) ->
+    all([children_sorted(N) || N <- Nodes]);
+children_sorted({_D, Node}) when is_tuple(Node) ->
+    children_sorted(Node);
+children_sorted({_K, _V, Nodes}) ->
+    children_sorted(Nodes).
+
+
+% Get the maximum depth of a revtree. The depth is "virtual" as it takes into
+% account the distance to the now stemmed root node as indicated by the top
+% level subtrees.
+%
+depth([]) ->
+    0;
+depth(RevTree) when is_list(RevTree) ->
+    lists:max([depth(T) || T <- RevTree]);
+depth({Depth, Node}) when is_tuple(Node) ->
+    depth(Node, Depth - 1).
+
+
+depth({_K, _V, Nodes}, Depth) ->
+    depth(Nodes, Depth + 1);
+depth([], Depth) ->
+    Depth;
+depth(Nodes, Depth) ->
+    lists:max([depth(Node, Depth) || Node <- Nodes]).
+
+
+% Get the "real" tree depth, not the virtual one. As revtrees gets stemmed they
+% will keep their virtual depth but the actual number of nodes in the tree
+% could be reduced.
+%
+real_depth([]) ->
+    0;
+real_depth(RevTree) when is_list(RevTree) ->
+    lists:max([real_depth(T) || T <- RevTree]);
+real_depth({_Depth, Node}) when is_tuple(Node) ->
+    depth(Node, 0).  % Note from here on use the depth/3 function
+
+
+% Return an ordered list of revtree nodes. When sorting only immediate keys
+% (revisions) are looked at and comparison doesn't descent into the treee.
+%
+ordered_nodes(Nodes) ->
+    lists:sort(fun({K1, _, _}, {K2, _, _}) -> K1 =< K2 end, Nodes).
+
+
+% Calculate a maximum number of rev tree nodes needed for a tree of a given
+% height and branchiness. Height is derived from Size and LevelReductionFactor,
+% that is how big the sample should be and quickly the size parameter would
+% shrink on each level.
+%
+keys_needed(0, _, _) ->
+    0;
+keys_needed(Size, LevelReductionFactor, 1) ->
+    expected_height(Size, LevelReductionFactor);
+keys_needed(Size, LevelReductionFactor, Branches) ->
+    Height =  expected_height(Size, LevelReductionFactor),
+    trunc(math:pow(Branches, Height + 1)) + 1.
+
+
+% Calculate expected tree height for a given sample size and branchiness.
+% At each step the size is divided by the reduction factor.
+expected_height(Size, LevelReductionFactor) ->
+    trunc(log(LevelReductionFactor, Size)) + 1.
+
+
+log(B, X) ->
+    math:log(X) / math:log(B).
+
+
+% Distribute items in a list into roughly equal chunks of a given size.
+%
+distribute(_ChunkSize, []) ->
+    [];
+distribute(ChunkSize, L) when ChunkSize >= length(L) ->
+    [L];
+distribute(ChunkSize, L) ->
+    {L1, L2} = lists:split(ChunkSize, L),
+    [L1 | distribute(ChunkSize, L2)].
+
+
+% Split a single (parent) revision list into chunks (sub-lists), one for each
+% child. Also, for safety, double check that at this point in the process the
+% list of revisions is sufficiently large. If it isn't something went wrong and
+% a specific exception is thrown ({not_enough_revisions, Got, Needed}).
+%
+child_revs(ChildCount, Revs, Size, MaxBranches) ->
+    NeedKeys = keys_needed(Size, ?SIZE_REDUCTION, MaxBranches),
+    case length(Revs) >= NeedKeys of
+        true ->
+            ChunkSize = trunc(length(Revs) / ChildCount) + 1,
+            distribute(ChunkSize, Revs);
+        false ->
+            throw({not_enough_revisions, length(Revs), NeedKeys})
+    end.