Key tree property testsproperty-tests-for-couch-key-tree

Key tree module is a candidate to use property tests on as it mostly deals with manipulating a single data structure and functions are referentially transparent, that is, they aren't many side-effects like IO for example. The test consists of two main parts - generators and properties. Generators generate random input, for example revision trees, and properties check that certain properties hold, for example that after stemming all the leaves are still present in the revtree. To run the test: make eunit apps=couch suites=couch_key_tree_prop_tests
author: Nick Vatamaniuc <vatamane@apache.org> 2018-02-02 12:08:08 -0500
committer: Nick Vatamaniuc <vatamane@apache.org> 2018-04-03 14:53:09 -0400
commit: 716ca59e4b148628e5d69e1c48e467e0f4cb87ac (patch)
tree: 5a8732ec1e840661de9ff5b38c8740a760a3d372
parent: 948a1311cd6c4a4ad68254f6358afe207ab8a767 (diff)
download: couchdb-property-tests-for-couch-key-tree.tar.gz
5 files changed, 537 insertions, 4 deletions
diff --git a/.gitignore b/.gitignore
index a1a3040c9..faa07f983 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,7 @@ src/mochiweb/
 src/oauth/
 src/rebar/
 src/snappy/
+src/triq/
 tmp/
 
 src/couch/*.o
diff --git a/Makefile b/Makefile
index 1315dda8b..d56d4e897 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@ DESTDIR=
 
 # Rebar options
 apps=
-skip_deps=folsom,meck,mochiweb,proper,snappy
+skip_deps=folsom,meck,mochiweb,triq,snappy
 suites=
 tests=
 
diff --git a/Makefile.win b/Makefile.win
index 91f91a548..5a2a73ab1 100644
--- a/Makefile.win
+++ b/Makefile.win
@@ -27,7 +27,7 @@ DESTDIR=
 
 # Rebar options
 apps=
-skip_deps=folsom,meck,mochiweb,proper,snappy
+skip_deps=folsom,meck,mochiweb,triq,snappy
 suites=
 tests=
 
diff --git a/rebar.config.script b/rebar.config.script
index 7d9b7feb5..7ae2136bb 100644
--- a/rebar.config.script
+++ b/rebar.config.script
@@ -66,8 +66,9 @@ DepDescs = [
 {mochiweb,         "mochiweb",         {tag, "v2.17.0"}},
 {meck,             "meck",             {tag, "0.8.8"}},
 {bcrypt,           {url, "https://github.com/apache/couchdb-erlang-bcrypt"},
-                   {tag, "1.0.2"}}
-
+                   {tag, "1.0.2"}},
+{triq,             {url, "https://gitlab.com/triq/triq.git"},
+                   "79bd272025434e152745067c36350faa7274c653"}
 ],
 
 
diff --git a/src/couch/test/couch_key_tree_prop_tests.erl b/src/couch/test/couch_key_tree_prop_tests.erl
new file mode 100644
index 000000000..604a8285a
--- /dev/null
+++ b/src/couch/test/couch_key_tree_prop_tests.erl
@@ -0,0 +1,531 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_key_tree_prop_tests).
+
+-include_lib("triq/include/triq.hrl").
+-triq(eunit).
+-include_lib("eunit/include/eunit.hrl").
+
+-define(SIZE_REDUCTION, 3).  % How much to reduce size with tree depth.
+-define(MAX_BRANCHES, 4).  % Maximum number of branches.
+-define(RAND_SIZE, 1 bsl 64).
+
+
+%
+% Properties
+%
+
+
+% Merge random paths from a revtree into itself. Check that no revisions have
+% been lost in the process and that result is one of the 3 expected values.
+%
+prop_revtree_merge_with_subset_of_own_nodes() ->
+    ?FORALL(Revs, g_revs(),
+        ?FORALL({RevTree, Branch}, {g_revtree(Revs), g_revtree(Revs, 1)},
+            ?IMPLIES(length(Branch) > 0 andalso repeating_revs(levels(RevTree ++ Branch)) == [],
+                begin
+                    {Merged, Result} = couch_key_tree:merge(RevTree, hd(Branch)),
+                    lists:member(Result, [new_leaf, new_branch, internal_node])
+                        andalso same_keys(RevTree ++ Branch, Merged)
+                        andalso valid_revtree(Merged)
+                end
+            )
+        )
+    ).
+
+
+% Merge random trees into revtree.
+%
+prop_revtree_merge_random_nodes() ->
+    ?FORALL({RevTree, Branch}, {g_revtree(), g_revtree([], 1)},
+        ?IMPLIES(length(Branch) > 0,
+            begin
+                {Merged, _} = couch_key_tree:merge(RevTree, hd(Branch)),
+                valid_revtree(Merged)
+            end
+        )
+    ).
+
+
+
+% Merge mix or random and existing revtree paths into revtree
+%
+prop_revtree_merge_some_existing_some_new() ->
+    ?FORALL(RevTree, g_revtree(),
+        ?FORALL(Branch,
+            begin
+                KeyList = keylist(RevTree),
+                Half = lists:sublist(KeyList, length(KeyList) div 2),
+                g_revtree(Half, 1)
+            end,
+            ?IMPLIES(length(Branch) > 0 andalso repeating_revs(levels(RevTree ++ Branch)) == [],
+                 begin
+                     {Merged, _} = couch_key_tree:merge(RevTree, hd(Branch)),
+                     valid_revtree(Merged)
+                 end
+            )
+        )
+    ).
+
+
+
+% Stem deeper than the current max level. Expect no changes to the revtree
+%
+prop_no_change_stemming_deeper_than_current_depth() ->
+    ?FORALL(RevTree, g_revtree(),
+        begin
+            StemDepth = depth(RevTree) + 1,
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            StemmedKeys = lists:usort(keylist(Stemmed)),
+            InputKeys = lists:usort(keylist(RevTree)),
+            StemmedKeys == InputKeys
+        end
+    ).
+
+
+% Stem at a random small depth, make sure that resulting tree has
+% unique revisions and the same number or less revisions than input
+%
+prop_stemming_results_in_same_or_less_total_revs() ->
+    ?FORALL({RevTree, StemDepth}, {g_revtree(), choose(1, 20)},
+        begin
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            OldRealDepth = real_depth(RevTree),
+            StemmedKeys = keylist(Stemmed),
+            UniqueStemmedKeys = lists:usort(StemmedKeys),
+            UniqueInputKeys = lists:usort(keylist(RevTree)),
+            NewRealDepth = real_depth(Stemmed),
+            length(StemmedKeys) == length(UniqueStemmedKeys)
+                andalso length(UniqueStemmedKeys) =< length(UniqueInputKeys)
+                andalso OldRealDepth >= NewRealDepth
+        end
+    ).
+
+
+% Generate a longer path (revtree with no branches) then stem it.
+% Always expect it to shrink to stemmed depth.
+prop_stem_path_expect_size_to_get_smaller() ->
+    ?FORALL({RevTree, StemDepth},
+        {
+            ?SIZED(Size, resize(Size * 10, g_revtree([], 1))),
+            choose(1,5)
+        },
+        ?IMPLIES(real_depth(RevTree) > 5,
+            begin
+                Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+                StemmedKeys = lists:usort(keylist(Stemmed)),
+                InputKeys = lists:usort(keylist(RevTree)),
+                length(InputKeys) > length(StemmedKeys)
+                    andalso real_depth(Stemmed) == StemDepth
+            end
+        )
+    ).
+
+
+% After stemming all leaves are still present
+prop_after_stemming_all_leaves_are_present() ->
+    ?FORALL({RevTree, StemDepth},
+        {g_revtree(), choose(1,20)},
+        begin
+            OldRealDepth = real_depth(RevTree),
+            OldLeaves = leaves(RevTree),
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            NewRealDepth = real_depth(Stemmed),
+            NewLeaves = leaves(Stemmed),
+            valid_revtree(Stemmed)
+               andalso OldRealDepth >= NewRealDepth
+               andalso OldLeaves == NewLeaves
+
+        end
+    ).
+
+
+% After stemming paths to root didn't get longer
+prop_after_stemming_paths_are_shorter() ->
+    ?FORALL({StemDepth, RevTree}, {choose(2,10), g_revtree()},
+        begin
+            OldPaths = paths(RevTree),
+            Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+            NewPaths = paths(Stemmed),
+            GrowingPaths = orddict:fold(fun(Rev, Path, Acc) ->
+               OldPath = orddict:fetch(Rev, OldPaths),
+               case length(Path) > length(OldPath) of
+                  true ->
+                       [{Rev, Path, OldPath}| Acc];
+                   false ->
+                       Acc
+               end
+            end, [], NewPaths),
+            valid_revtree(Stemmed) andalso GrowingPaths == []
+        end
+    ).
+
+
+% Check leaf count
+prop_leaf_count() ->
+    ?FORALL(RevTree, g_revtree(),
+        length(leaves(RevTree)) == couch_key_tree:count_leafs(RevTree)
+    ).
+
+
+% Check get leafs
+prop_get_leafs() ->
+    ?FORALL(RevTree, g_revtree(),
+        begin
+            LeafsFull = couch_key_tree:get_all_leafs(RevTree),
+            lists:usort([Rev || {_V, {_D, [Rev | _]}} <- LeafsFull]) == leaves(RevTree)
+        end
+    ).
+
+
+%
+% Generators
+%
+
+% Generate a full rev tree. Most of the forms are just there to set up default
+% parameters, _revtree/3 does all heavy lifting.
+%
+
+g_revtree() ->
+    ?SIZED(Size, g_revtree(Size)).
+
+
+g_revtree(Size) when is_integer(Size) ->
+    g_revtree(Size, [], ?MAX_BRANCHES);
+g_revtree(Revs) when is_list(Revs) ->
+    ?SIZED(Size, g_revtree(Size, Revs, ?MAX_BRANCHES)).
+
+
+g_revtree(Size, Revs) when is_integer(Size), is_list(Revs) ->
+    g_revtree(Size, Revs, ?MAX_BRANCHES);
+g_revtree(Revs, MaxBranches) when is_list(Revs), is_integer(MaxBranches) ->
+    ?SIZED(Size, g_revtree(Size, Revs, MaxBranches)).
+
+
+g_revtree(0, _Revs, _MaxBranches) ->
+    [];
+g_revtree(Size, ERevs, MaxBranches) ->
+    ?LET({Depth, Revs}, {g_stem_depth(Size), g_revs(Size, ERevs)},
+         [{Depth, g_treenode(Size, Revs, MaxBranches)}]
+    ).
+
+
+% Generate a tree node and then recursively generate its children.
+%
+g_treenode(0, Revs, _) ->
+    {elements(Revs), x, []};
+g_treenode(Size, Revs, MaxBranches) ->
+    ?DELAY(?LET(N, int(0, MaxBranches),
+        begin
+            [Rev | ChildRevs] = Revs,
+            {Rev, x, g_nodes(Size div ?SIZE_REDUCTION, N, ChildRevs, MaxBranches)}
+        end
+    )).
+
+
+% Generate a list of child nodes. Depending on how many children there are
+% the pre-generarated revision list is split into that many sublists.
+%
+g_nodes(0, _N, _Revs, _MaxBranches) ->
+    [];
+g_nodes(_Size, 0, _Revs, _MaxBranches) ->
+    [];
+g_nodes(Size, ChildCount, Revs, MaxBranches) ->
+    ?LETSHRINK(
+        ChildNodes,
+        begin
+            ChildRevList = child_revs(ChildCount, Revs, Size, MaxBranches),
+            [g_treenode(Size, ChildRevs, MaxBranches) || ChildRevs <- ChildRevList]
+        end,
+        ordered_nodes(ChildNodes)
+    ).
+
+
+% Generate each subtree's stem depth
+%
+
+
+g_stem_depth(Size) ->
+    choose(0,  expected_height(Size, ?SIZE_REDUCTION) div 2).
+
+
+% Uses the shuffle/1 function to shuffle the input list. Unshuffled list is
+% used as the shrink value.
+%
+g_shuffle(L) when is_list(L) ->
+    triq_dom:domain(g_shuffle,
+        fun(Self, _Size) -> {Self, shuffle(L)} end,
+        fun(Self, _Val) -> {Self, L} end
+     ).
+
+
+% Wrapper to make a list shuffling generator that doesn't shrink
+%
+g_shuffle_noshrink(L) when is_list(L) ->
+    triq_dom:noshrink(g_shuffle(L)).
+
+
+% Generate shuffled sublists up to N items long from a list.
+%
+g_shuffled_sublists(L, N) ->
+    ?LET(Shuffled, g_shuffle_noshrink(L), lists:sublist(Shuffled, N)).
+
+
+% Generate revision lists.
+%
+g_revs() ->
+    ?SIZED(Size, g_revs(Size)).
+
+
+g_revs(Size) when is_integer(Size) ->
+    g_revs(Size, []).
+
+
+g_revs(Size, Existing) when is_integer(Size), is_list(Existing) ->
+    Expected = keys_needed(Size, ?SIZE_REDUCTION, ?MAX_BRANCHES),
+    Revs = revs(Expected, Existing),
+    case length(Revs) > Expected of
+        true -> % have extra, try various sublists
+            g_shuffled_sublists(Revs, Expected);
+        false ->
+            triq_dom:return(Revs)
+    end.
+
+
+%
+% Helper functions
+%
+
+
+valid_revtree(RevTree) ->
+    repeating_revs(levels(RevTree)) == [] andalso children_sorted(RevTree).
+
+
+same_keys(RevTree1, RevTree2) ->
+    Keys1 = lists:usort(keylist(RevTree1)),
+    Keys2 = lists:usort(keylist(RevTree2)),
+    Keys1 == Keys2.
+
+
+all(L) ->
+    lists:all(fun(E) -> E end, L).
+
+% Shufle a list of items. Tag each item with a random number then sort
+% the list and remove the tags.
+%
+shuffle(L) ->
+    Tagged = [{triq_rnd:uniform(), X} || X <- L],
+    [X || {_, X} <- lists:sort(Tagged)].
+
+
+% Generate list of relateively unique large random numbers
+rand_list(N) when N =< 0 ->
+    [];
+rand_list(N) ->
+    [triq_rnd:uniform(?RAND_SIZE) || _ <- lists:seq(1, N)].
+
+
+% Generate a list of revisions to be used as key in revision trees. Expected
+% must the number of maximum expected nodes in a revision tree. Existing is an
+% optional list revisions which must be included in the result. The output list
+% is sorted.
+revs(0, _Existing) ->
+    [];
+revs(Expected, Existing) when is_integer(Expected), is_list(Existing) ->
+    Need = Expected - length(Existing),
+    lists:usort(lists:append(Existing, rand_list(Need))).
+
+
+% Get the list of all the keys in a revision tree. The input can also be a
+% an individual tree (tagged with the depth to virtual root) or a node.
+% Yes, this is not tail recursive but the idea is to keep it simple.
+%
+keylist({_D, Node}) when is_tuple(Node) ->
+    keylist(Node);
+keylist({K, _V, Nodes}) ->
+    [K | keylist(Nodes)];
+keylist(Nodes) ->
+    lists:append([keylist(Node) || Node <- Nodes]).
+
+
+% Get the list of leaves from a revision tree.
+leaves([]) ->
+    [];
+leaves({_D, Node}) when is_tuple(Node) ->
+    leaves(Node);
+leaves({K, _V, []}) ->
+    [K];
+leaves({_K, _V, Nodes}) ->
+    leaves(Nodes);
+leaves(Nodes) ->
+    lists:usort(lists:append([leaves(N) || N <- Nodes])).
+
+
+% Get paths from leaf to root. Result is an orddict of [{LeafRev, [Rev]}]
+%
+paths([]) ->
+    orddict:new();
+paths(RevTree) when is_list(RevTree) ->
+    paths_merge_dicts([paths(T) || T <- RevTree]);
+paths({_Depth, Node}) when is_tuple(Node) ->
+    paths(Node);
+paths({K, _V, []}) ->
+    orddict:store(K, [], orddict:new());
+paths({K, _V, Nodes}) ->
+    CombinedDict = paths_merge_dicts([paths(N) || N <- Nodes]),
+    orddict:map(fun(_LeafKey, Path) -> Path ++ [K] end, CombinedDict).
+
+
+paths_merge_dicts(Dicts) ->
+    lists:foldl(fun(D, AccD) ->
+        orddict:merge(fun(K, V1, V2) ->
+            throw({found_duplicates, K, V1, V2})
+         end, D, AccD)
+    end, orddict:new(), Dicts).
+
+
+% Get lists of all the keys at each depth level. Result is an orddict that
+% looks like [{depth, [key]}]. The depth used here is the "virtual" depth as
+% indicated by the stemmed depth tag that goes with every top level subtree.
+%
+levels([]) ->
+    orddict:new();
+levels(RevTree) when is_list(RevTree) ->
+    lists:foldl(fun(T, Dict) -> levels(T, Dict) end, orddict:new(), RevTree).
+
+
+levels({Depth, Node}, Dict) when is_tuple(Node) ->
+    levels(Node, Depth, Dict).
+
+
+levels({K, _V, Nodes}, Depth, Dict) ->
+    Dict1 = case orddict:is_key(Depth, Dict) of
+        true -> orddict:append(Depth, K, Dict);
+        false -> orddict:store(Depth, [K], Dict)
+    end,
+    levels(Nodes, Depth + 1, Dict1);
+levels(Nodes, Depth, Dict) ->
+    lists:foldl(fun(Node, AccDict) ->
+        levels(Node, Depth, AccDict)
+    end, Dict, Nodes).
+
+
+% Using the output of leaves/1 as input return any repeating revisions if
+% there are any at a particular level. Levels which have not revisions are
+% not returned.
+%
+repeating_revs(Dict) ->
+    orddict:filter(fun(_Depth, Revs) ->
+        length(lists:usort(Revs)) =/= length(Revs)
+    end, Dict).
+
+
+% Check that children of all nodes are sorted
+children_sorted([]) ->
+    true;
+children_sorted(Nodes) when is_list(Nodes) ->
+    all([children_sorted(N) || N <- Nodes]);
+children_sorted({_D, Node}) when is_tuple(Node) ->
+    children_sorted(Node);
+children_sorted({_K, _V, Nodes}) ->
+    children_sorted(Nodes).
+
+
+% Get the maximum depth of a revtree. The depth is "virtual" as it takes into
+% account the distance to the now stemmed root node as indicated by the top
+% level subtrees.
+%
+depth([]) ->
+    0;
+depth(RevTree) when is_list(RevTree) ->
+    lists:max([depth(T) || T <- RevTree]);
+depth({Depth, Node}) when is_tuple(Node) ->
+    depth(Node, Depth - 1).
+
+
+depth({_K, _V, Nodes}, Depth) ->
+    depth(Nodes, Depth + 1);
+depth([], Depth) ->
+    Depth;
+depth(Nodes, Depth) ->
+    lists:max([depth(Node, Depth) || Node <- Nodes]).
+
+
+% Get the "real" tree depth, not the virtual one. As revtrees gets stemmed they
+% will keep their virtual depth but the actual number of nodes in the tree
+% could be reduced.
+%
+real_depth([]) ->
+    0;
+real_depth(RevTree) when is_list(RevTree) ->
+    lists:max([real_depth(T) || T <- RevTree]);
+real_depth({_Depth, Node}) when is_tuple(Node) ->
+    depth(Node, 0).  % Note from here on use the depth/3 function
+
+
+% Return an ordered list of revtree nodes. When sorting only immediate keys
+% (revisions) are looked at and comparison doesn't descent into the treee.
+%
+ordered_nodes(Nodes) ->
+    lists:sort(fun({K1, _, _}, {K2, _, _}) -> K1 =< K2 end, Nodes).
+
+
+% Calculate a maximum number of rev tree nodes needed for a tree of a given
+% height and branchiness. Height is derived from Size and LevelReductionFactor,
+% that is how big the sample should be and quickly the size parameter would
+% shrink on each level.
+%
+keys_needed(0, _, _) ->
+    0;
+keys_needed(Size, LevelReductionFactor, 1) ->
+    expected_height(Size, LevelReductionFactor);
+keys_needed(Size, LevelReductionFactor, Branches) ->
+    Height =  expected_height(Size, LevelReductionFactor),
+    trunc(math:pow(Branches, Height + 1)) + 1.
+
+
+% Calculate expected tree height for a given sample size and branchiness.
+% At each step the size is divided by the reduction factor.
+expected_height(Size, LevelReductionFactor) ->
+    trunc(log(LevelReductionFactor, Size)) + 1.
+
+
+log(B, X) ->
+    math:log(X) / math:log(B).
+
+
+% Distribute items in a list into roughly equal chunks of a given size.
+%
+distribute(_ChunkSize, []) ->
+    [];
+distribute(ChunkSize, L) when ChunkSize >= length(L) ->
+    [L];
+distribute(ChunkSize, L) ->
+    {L1, L2} = lists:split(ChunkSize, L),
+    [L1 | distribute(ChunkSize, L2)].
+
+
+% Split a single (parent) revision list into chunks (sub-lists), one for each
+% child. Also, for safety, double check that at this point in the process the
+% list of revisions is sufficiently large. If it isn't something went wrong and
+% a specific exception is thrown ({not_enough_revisions, Got, Needed}).
+%
+child_revs(ChildCount, Revs, Size, MaxBranches) ->
+    NeedKeys = keys_needed(Size, ?SIZE_REDUCTION, MaxBranches),
+    case length(Revs) >= NeedKeys of
+        true ->
+            ChunkSize = trunc(length(Revs) / ChildCount) + 1,
+            distribute(ChunkSize, Revs);
+        false ->
+            throw({not_enough_revisions, length(Revs), NeedKeys})
+    end.
author	Nick Vatamaniuc <vatamane@apache.org>	2018-02-02 12:08:08 -0500
committer	Nick Vatamaniuc <vatamane@apache.org>	2018-04-03 14:53:09 -0400
commit	716ca59e4b148628e5d69e1c48e467e0f4cb87ac (patch)
tree	5a8732ec1e840661de9ff5b38c8740a760a3d372
parent	948a1311cd6c4a4ad68254f6358afe207ab8a767 (diff)
download	couchdb-property-tests-for-couch-key-tree.tar.gz