summaryrefslogtreecommitdiff
path: root/src/couch/test/couch_key_tree_prop_tests.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/couch/test/couch_key_tree_prop_tests.erl')
-rw-r--r--src/couch/test/couch_key_tree_prop_tests.erl531
1 files changed, 531 insertions, 0 deletions
diff --git a/src/couch/test/couch_key_tree_prop_tests.erl b/src/couch/test/couch_key_tree_prop_tests.erl
new file mode 100644
index 000000000..604a8285a
--- /dev/null
+++ b/src/couch/test/couch_key_tree_prop_tests.erl
@@ -0,0 +1,531 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_key_tree_prop_tests).
+
+-include_lib("triq/include/triq.hrl").
+-triq(eunit).
+-include_lib("eunit/include/eunit.hrl").
+
+-define(SIZE_REDUCTION, 3). % How much to reduce size with tree depth.
+-define(MAX_BRANCHES, 4). % Maximum number of branches.
+-define(RAND_SIZE, 1 bsl 64).
+
+
+%
+% Properties
+%
+
+
+% Merge random paths from a revtree into itself. Check that no revisions have
+% been lost in the process and that result is one of the 3 expected values.
+%
+prop_revtree_merge_with_subset_of_own_nodes() ->
+ ?FORALL(Revs, g_revs(),
+ ?FORALL({RevTree, Branch}, {g_revtree(Revs), g_revtree(Revs, 1)},
+ ?IMPLIES(length(Branch) > 0 andalso repeating_revs(levels(RevTree ++ Branch)) == [],
+ begin
+ {Merged, Result} = couch_key_tree:merge(RevTree, hd(Branch)),
+ lists:member(Result, [new_leaf, new_branch, internal_node])
+ andalso same_keys(RevTree ++ Branch, Merged)
+ andalso valid_revtree(Merged)
+ end
+ )
+ )
+ ).
+
+
+% Merge random trees into revtree.
+%
+prop_revtree_merge_random_nodes() ->
+ ?FORALL({RevTree, Branch}, {g_revtree(), g_revtree([], 1)},
+ ?IMPLIES(length(Branch) > 0,
+ begin
+ {Merged, _} = couch_key_tree:merge(RevTree, hd(Branch)),
+ valid_revtree(Merged)
+ end
+ )
+ ).
+
+
+
+% Merge mix or random and existing revtree paths into revtree
+%
+prop_revtree_merge_some_existing_some_new() ->
+ ?FORALL(RevTree, g_revtree(),
+ ?FORALL(Branch,
+ begin
+ KeyList = keylist(RevTree),
+ Half = lists:sublist(KeyList, length(KeyList) div 2),
+ g_revtree(Half, 1)
+ end,
+ ?IMPLIES(length(Branch) > 0 andalso repeating_revs(levels(RevTree ++ Branch)) == [],
+ begin
+ {Merged, _} = couch_key_tree:merge(RevTree, hd(Branch)),
+ valid_revtree(Merged)
+ end
+ )
+ )
+ ).
+
+
+
+% Stem deeper than the current max level. Expect no changes to the revtree
+%
+prop_no_change_stemming_deeper_than_current_depth() ->
+ ?FORALL(RevTree, g_revtree(),
+ begin
+ StemDepth = depth(RevTree) + 1,
+ Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+ StemmedKeys = lists:usort(keylist(Stemmed)),
+ InputKeys = lists:usort(keylist(RevTree)),
+ StemmedKeys == InputKeys
+ end
+ ).
+
+
+% Stem at a random small depth, make sure that resulting tree has
+% unique revisions and the same number or less revisions than input
+%
+prop_stemming_results_in_same_or_less_total_revs() ->
+ ?FORALL({RevTree, StemDepth}, {g_revtree(), choose(1, 20)},
+ begin
+ Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+ OldRealDepth = real_depth(RevTree),
+ StemmedKeys = keylist(Stemmed),
+ UniqueStemmedKeys = lists:usort(StemmedKeys),
+ UniqueInputKeys = lists:usort(keylist(RevTree)),
+ NewRealDepth = real_depth(Stemmed),
+ length(StemmedKeys) == length(UniqueStemmedKeys)
+ andalso length(UniqueStemmedKeys) =< length(UniqueInputKeys)
+ andalso OldRealDepth >= NewRealDepth
+ end
+ ).
+
+
+% Generate a longer path (revtree with no branches) then stem it.
+% Always expect it to shrink to stemmed depth.
+prop_stem_path_expect_size_to_get_smaller() ->
+ ?FORALL({RevTree, StemDepth},
+ {
+ ?SIZED(Size, resize(Size * 10, g_revtree([], 1))),
+ choose(1,5)
+ },
+ ?IMPLIES(real_depth(RevTree) > 5,
+ begin
+ Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+ StemmedKeys = lists:usort(keylist(Stemmed)),
+ InputKeys = lists:usort(keylist(RevTree)),
+ length(InputKeys) > length(StemmedKeys)
+ andalso real_depth(Stemmed) == StemDepth
+ end
+ )
+ ).
+
+
+% After stemming all leaves are still present
+prop_after_stemming_all_leaves_are_present() ->
+ ?FORALL({RevTree, StemDepth},
+ {g_revtree(), choose(1,20)},
+ begin
+ OldRealDepth = real_depth(RevTree),
+ OldLeaves = leaves(RevTree),
+ Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+ NewRealDepth = real_depth(Stemmed),
+ NewLeaves = leaves(Stemmed),
+ valid_revtree(Stemmed)
+ andalso OldRealDepth >= NewRealDepth
+ andalso OldLeaves == NewLeaves
+
+ end
+ ).
+
+
+% After stemming paths to root didn't get longer
+prop_after_stemming_paths_are_shorter() ->
+ ?FORALL({StemDepth, RevTree}, {choose(2,10), g_revtree()},
+ begin
+ OldPaths = paths(RevTree),
+ Stemmed = couch_key_tree:stem(RevTree, StemDepth),
+ NewPaths = paths(Stemmed),
+ GrowingPaths = orddict:fold(fun(Rev, Path, Acc) ->
+ OldPath = orddict:fetch(Rev, OldPaths),
+ case length(Path) > length(OldPath) of
+ true ->
+ [{Rev, Path, OldPath}| Acc];
+ false ->
+ Acc
+ end
+ end, [], NewPaths),
+ valid_revtree(Stemmed) andalso GrowingPaths == []
+ end
+ ).
+
+
+% Check leaf count
+prop_leaf_count() ->
+ ?FORALL(RevTree, g_revtree(),
+ length(leaves(RevTree)) == couch_key_tree:count_leafs(RevTree)
+ ).
+
+
+% Check get leafs
+prop_get_leafs() ->
+ ?FORALL(RevTree, g_revtree(),
+ begin
+ LeafsFull = couch_key_tree:get_all_leafs(RevTree),
+ lists:usort([Rev || {_V, {_D, [Rev | _]}} <- LeafsFull]) == leaves(RevTree)
+ end
+ ).
+
+
+%
+% Generators
+%
+
+% Generate a full rev tree. Most of the forms are just there to set up default
+% parameters, _revtree/3 does all heavy lifting.
+%
+
+g_revtree() ->
+ ?SIZED(Size, g_revtree(Size)).
+
+
+g_revtree(Size) when is_integer(Size) ->
+ g_revtree(Size, [], ?MAX_BRANCHES);
+g_revtree(Revs) when is_list(Revs) ->
+ ?SIZED(Size, g_revtree(Size, Revs, ?MAX_BRANCHES)).
+
+
+g_revtree(Size, Revs) when is_integer(Size), is_list(Revs) ->
+ g_revtree(Size, Revs, ?MAX_BRANCHES);
+g_revtree(Revs, MaxBranches) when is_list(Revs), is_integer(MaxBranches) ->
+ ?SIZED(Size, g_revtree(Size, Revs, MaxBranches)).
+
+
+g_revtree(0, _Revs, _MaxBranches) ->
+ [];
+g_revtree(Size, ERevs, MaxBranches) ->
+ ?LET({Depth, Revs}, {g_stem_depth(Size), g_revs(Size, ERevs)},
+ [{Depth, g_treenode(Size, Revs, MaxBranches)}]
+ ).
+
+
+% Generate a tree node and then recursively generate its children.
+%
+g_treenode(0, Revs, _) ->
+ {elements(Revs), x, []};
+g_treenode(Size, Revs, MaxBranches) ->
+ ?DELAY(?LET(N, int(0, MaxBranches),
+ begin
+ [Rev | ChildRevs] = Revs,
+ {Rev, x, g_nodes(Size div ?SIZE_REDUCTION, N, ChildRevs, MaxBranches)}
+ end
+ )).
+
+
+% Generate a list of child nodes. Depending on how many children there are
+% the pre-generarated revision list is split into that many sublists.
+%
+g_nodes(0, _N, _Revs, _MaxBranches) ->
+ [];
+g_nodes(_Size, 0, _Revs, _MaxBranches) ->
+ [];
+g_nodes(Size, ChildCount, Revs, MaxBranches) ->
+ ?LETSHRINK(
+ ChildNodes,
+ begin
+ ChildRevList = child_revs(ChildCount, Revs, Size, MaxBranches),
+ [g_treenode(Size, ChildRevs, MaxBranches) || ChildRevs <- ChildRevList]
+ end,
+ ordered_nodes(ChildNodes)
+ ).
+
+
+% Generate each subtree's stem depth
+%
+
+
+g_stem_depth(Size) ->
+ choose(0, expected_height(Size, ?SIZE_REDUCTION) div 2).
+
+
+% Uses the shuffle/1 function to shuffle the input list. Unshuffled list is
+% used as the shrink value.
+%
+g_shuffle(L) when is_list(L) ->
+ triq_dom:domain(g_shuffle,
+ fun(Self, _Size) -> {Self, shuffle(L)} end,
+ fun(Self, _Val) -> {Self, L} end
+ ).
+
+
+% Wrapper to make a list shuffling generator that doesn't shrink
+%
+g_shuffle_noshrink(L) when is_list(L) ->
+ triq_dom:noshrink(g_shuffle(L)).
+
+
+% Generate shuffled sublists up to N items long from a list.
+%
+g_shuffled_sublists(L, N) ->
+ ?LET(Shuffled, g_shuffle_noshrink(L), lists:sublist(Shuffled, N)).
+
+
+% Generate revision lists.
+%
+g_revs() ->
+ ?SIZED(Size, g_revs(Size)).
+
+
+g_revs(Size) when is_integer(Size) ->
+ g_revs(Size, []).
+
+
+g_revs(Size, Existing) when is_integer(Size), is_list(Existing) ->
+ Expected = keys_needed(Size, ?SIZE_REDUCTION, ?MAX_BRANCHES),
+ Revs = revs(Expected, Existing),
+ case length(Revs) > Expected of
+ true -> % have extra, try various sublists
+ g_shuffled_sublists(Revs, Expected);
+ false ->
+ triq_dom:return(Revs)
+ end.
+
+
+%
+% Helper functions
+%
+
+
+valid_revtree(RevTree) ->
+ repeating_revs(levels(RevTree)) == [] andalso children_sorted(RevTree).
+
+
+same_keys(RevTree1, RevTree2) ->
+ Keys1 = lists:usort(keylist(RevTree1)),
+ Keys2 = lists:usort(keylist(RevTree2)),
+ Keys1 == Keys2.
+
+
+all(L) ->
+ lists:all(fun(E) -> E end, L).
+
+% Shufle a list of items. Tag each item with a random number then sort
+% the list and remove the tags.
+%
+shuffle(L) ->
+ Tagged = [{triq_rnd:uniform(), X} || X <- L],
+ [X || {_, X} <- lists:sort(Tagged)].
+
+
+% Generate list of relateively unique large random numbers
+rand_list(N) when N =< 0 ->
+ [];
+rand_list(N) ->
+ [triq_rnd:uniform(?RAND_SIZE) || _ <- lists:seq(1, N)].
+
+
+% Generate a list of revisions to be used as key in revision trees. Expected
+% must the number of maximum expected nodes in a revision tree. Existing is an
+% optional list revisions which must be included in the result. The output list
+% is sorted.
+revs(0, _Existing) ->
+ [];
+revs(Expected, Existing) when is_integer(Expected), is_list(Existing) ->
+ Need = Expected - length(Existing),
+ lists:usort(lists:append(Existing, rand_list(Need))).
+
+
+% Get the list of all the keys in a revision tree. The input can also be a
+% an individual tree (tagged with the depth to virtual root) or a node.
+% Yes, this is not tail recursive but the idea is to keep it simple.
+%
+keylist({_D, Node}) when is_tuple(Node) ->
+ keylist(Node);
+keylist({K, _V, Nodes}) ->
+ [K | keylist(Nodes)];
+keylist(Nodes) ->
+ lists:append([keylist(Node) || Node <- Nodes]).
+
+
+% Get the list of leaves from a revision tree.
+leaves([]) ->
+ [];
+leaves({_D, Node}) when is_tuple(Node) ->
+ leaves(Node);
+leaves({K, _V, []}) ->
+ [K];
+leaves({_K, _V, Nodes}) ->
+ leaves(Nodes);
+leaves(Nodes) ->
+ lists:usort(lists:append([leaves(N) || N <- Nodes])).
+
+
+% Get paths from leaf to root. Result is an orddict of [{LeafRev, [Rev]}]
+%
+paths([]) ->
+ orddict:new();
+paths(RevTree) when is_list(RevTree) ->
+ paths_merge_dicts([paths(T) || T <- RevTree]);
+paths({_Depth, Node}) when is_tuple(Node) ->
+ paths(Node);
+paths({K, _V, []}) ->
+ orddict:store(K, [], orddict:new());
+paths({K, _V, Nodes}) ->
+ CombinedDict = paths_merge_dicts([paths(N) || N <- Nodes]),
+ orddict:map(fun(_LeafKey, Path) -> Path ++ [K] end, CombinedDict).
+
+
+paths_merge_dicts(Dicts) ->
+ lists:foldl(fun(D, AccD) ->
+ orddict:merge(fun(K, V1, V2) ->
+ throw({found_duplicates, K, V1, V2})
+ end, D, AccD)
+ end, orddict:new(), Dicts).
+
+
+% Get lists of all the keys at each depth level. Result is an orddict that
+% looks like [{depth, [key]}]. The depth used here is the "virtual" depth as
+% indicated by the stemmed depth tag that goes with every top level subtree.
+%
+levels([]) ->
+ orddict:new();
+levels(RevTree) when is_list(RevTree) ->
+ lists:foldl(fun(T, Dict) -> levels(T, Dict) end, orddict:new(), RevTree).
+
+
+levels({Depth, Node}, Dict) when is_tuple(Node) ->
+ levels(Node, Depth, Dict).
+
+
+levels({K, _V, Nodes}, Depth, Dict) ->
+ Dict1 = case orddict:is_key(Depth, Dict) of
+ true -> orddict:append(Depth, K, Dict);
+ false -> orddict:store(Depth, [K], Dict)
+ end,
+ levels(Nodes, Depth + 1, Dict1);
+levels(Nodes, Depth, Dict) ->
+ lists:foldl(fun(Node, AccDict) ->
+ levels(Node, Depth, AccDict)
+ end, Dict, Nodes).
+
+
+% Using the output of leaves/1 as input return any repeating revisions if
+% there are any at a particular level. Levels which have not revisions are
+% not returned.
+%
+repeating_revs(Dict) ->
+ orddict:filter(fun(_Depth, Revs) ->
+ length(lists:usort(Revs)) =/= length(Revs)
+ end, Dict).
+
+
+% Check that children of all nodes are sorted
+children_sorted([]) ->
+ true;
+children_sorted(Nodes) when is_list(Nodes) ->
+ all([children_sorted(N) || N <- Nodes]);
+children_sorted({_D, Node}) when is_tuple(Node) ->
+ children_sorted(Node);
+children_sorted({_K, _V, Nodes}) ->
+ children_sorted(Nodes).
+
+
+% Get the maximum depth of a revtree. The depth is "virtual" as it takes into
+% account the distance to the now stemmed root node as indicated by the top
+% level subtrees.
+%
+depth([]) ->
+ 0;
+depth(RevTree) when is_list(RevTree) ->
+ lists:max([depth(T) || T <- RevTree]);
+depth({Depth, Node}) when is_tuple(Node) ->
+ depth(Node, Depth - 1).
+
+
+depth({_K, _V, Nodes}, Depth) ->
+ depth(Nodes, Depth + 1);
+depth([], Depth) ->
+ Depth;
+depth(Nodes, Depth) ->
+ lists:max([depth(Node, Depth) || Node <- Nodes]).
+
+
+% Get the "real" tree depth, not the virtual one. As revtrees gets stemmed they
+% will keep their virtual depth but the actual number of nodes in the tree
+% could be reduced.
+%
+real_depth([]) ->
+ 0;
+real_depth(RevTree) when is_list(RevTree) ->
+ lists:max([real_depth(T) || T <- RevTree]);
+real_depth({_Depth, Node}) when is_tuple(Node) ->
+ depth(Node, 0). % Note from here on use the depth/3 function
+
+
+% Return an ordered list of revtree nodes. When sorting only immediate keys
+% (revisions) are looked at and comparison doesn't descent into the treee.
+%
+ordered_nodes(Nodes) ->
+ lists:sort(fun({K1, _, _}, {K2, _, _}) -> K1 =< K2 end, Nodes).
+
+
+% Calculate a maximum number of rev tree nodes needed for a tree of a given
+% height and branchiness. Height is derived from Size and LevelReductionFactor,
+% that is how big the sample should be and quickly the size parameter would
+% shrink on each level.
+%
+keys_needed(0, _, _) ->
+ 0;
+keys_needed(Size, LevelReductionFactor, 1) ->
+ expected_height(Size, LevelReductionFactor);
+keys_needed(Size, LevelReductionFactor, Branches) ->
+ Height = expected_height(Size, LevelReductionFactor),
+ trunc(math:pow(Branches, Height + 1)) + 1.
+
+
+% Calculate expected tree height for a given sample size and branchiness.
+% At each step the size is divided by the reduction factor.
+expected_height(Size, LevelReductionFactor) ->
+ trunc(log(LevelReductionFactor, Size)) + 1.
+
+
+log(B, X) ->
+ math:log(X) / math:log(B).
+
+
+% Distribute items in a list into roughly equal chunks of a given size.
+%
+distribute(_ChunkSize, []) ->
+ [];
+distribute(ChunkSize, L) when ChunkSize >= length(L) ->
+ [L];
+distribute(ChunkSize, L) ->
+ {L1, L2} = lists:split(ChunkSize, L),
+ [L1 | distribute(ChunkSize, L2)].
+
+
+% Split a single (parent) revision list into chunks (sub-lists), one for each
+% child. Also, for safety, double check that at this point in the process the
+% list of revisions is sufficiently large. If it isn't something went wrong and
+% a specific exception is thrown ({not_enough_revisions, Got, Needed}).
+%
+child_revs(ChildCount, Revs, Size, MaxBranches) ->
+ NeedKeys = keys_needed(Size, ?SIZE_REDUCTION, MaxBranches),
+ case length(Revs) >= NeedKeys of
+ true ->
+ ChunkSize = trunc(length(Revs) / ChildCount) + 1,
+ distribute(ChunkSize, Revs);
+ false ->
+ throw({not_enough_revisions, length(Revs), NeedKeys})
+ end.