WIP: All tests passingprototype/fdb-layer-couch-views-size-tests

author: Paul J. Davis <paul.joseph.davis@gmail.com> 2020-03-04 13:11:04 -0600
committer: Paul J. Davis <paul.joseph.davis@gmail.com> 2020-03-04 13:11:04 -0600
commit: 983c4fa33f48a853e191376c47ab28ad8f45f297 (patch)
tree: 146e9398af08c645a2887b62f697d300fe62f4f0
parent: 02bae27c75365239e34fb0cff7f85ebd93e9b46f (diff)
download: couchdb-prototype/fdb-layer-couch-views-size-tests.tar.gz
1 files changed, 123 insertions, 17 deletions
diff --git a/src/couch_views/test/couch_views_size_test.erl b/src/couch_views/test/couch_views_size_test.erl
index 8295006bf..36b039753 100644
--- a/src/couch_views/test/couch_views_size_test.erl
+++ b/src/couch_views/test/couch_views_size_test.erl
@@ -1,3 +1,9 @@
+%% blargh break compiler
+%%
+%% Bug fix 1: removing dupes is brokne
+%% Bug fix 2: sort keys with fancy unicode are broken
+%% Maybe bug 3: Did dbcore sort dupe values by Erlang or JSON comparison?
+
 % Licensed under the Apache License, Version 2.0 (the "License"); you may not
 % use this file except in compliance with the License. You may obtain a copy of
 % the License at
@@ -58,6 +64,7 @@
 % after each one. Index validation will purely look at the existing
 % state of the index in fdb and validate correctness.
 
+-define(DEFAULT_TEST_PERCENTAGE, 0.1).
 
 -define(N_DOMAIN, [0, 1, 2, 5]).
 -define(D_DOMAIN, [0, 1, 2, 5]).
@@ -94,9 +101,9 @@ generate_sets() ->
 
 generate_transitions() ->
     Sets = generate_sets(),
-    Pairs0 = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
-    Pairs = lists:sublist(Pairs0, 3),
-    %Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    %Pairs0 = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    %Pairs = lists:sublist(Pairs0, 3),
+    Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
     lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) ->
         Filter = fun(DeltaN, DeltaD, DeltaR) ->
             % Can't share more keys than the smaller of the
@@ -130,8 +137,8 @@ generate_transitions() ->
             end,
 
             % If we have more non-repeated keys in our
-            % transition then there's no "room" in the target
-            % set which is not a valid test case.
+            % transition than there's "room" for in the target
+            % set it isn't a valid test case.
             TransitionNonRepeats = DeltaN - DeltaD,
             TargetNonRepeats = N2 - D2,
             case TransitionNonRepeats > TargetNonRepeats of
@@ -146,6 +153,11 @@ generate_transitions() ->
 
 
 permute(NList, DList, RList, Filter) ->
+    % Technically we could call into Filter in each
+    % outer loops to conditionally skip inner loops.
+    % If someone comes along looking to speed up the
+    % fixture setup time, this would likely be an
+    % easy win.
     lists:foldl(fun(N, NAcc) ->
         lists:foldl(fun(D, DAcc) ->
             lists:foldl(fun(R, RAcc) ->
@@ -188,13 +200,29 @@ cleanup({Ctx, Db}) ->
 
 
 create_transition_tests({_Ctx, Db}) ->
-    lists:map(fun(T) ->
-        Name = lists:flatten(io_lib:format("~w", [T])),
-        {Name, fun() -> check_transition(Db, T) end}
-    end, lists:sort(generate_transitions())).
+    Transitions = generate_transitions(),
+    Single = lists:flatmap(fun(T) ->
+        case sample_test() of
+            true ->
+                Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+                [{Name, fun() -> check_single_transition(Db, T) end}];
+            false ->
+                []
+        end
+    end, lists:sort(Transitions)),
+    Multi = lists:flatmap(fun(T) ->
+        case sample_test() of
+            true ->
+                Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+                [{Name, fun() -> check_multi_transition(Db, T) end}];
+            false ->
+                []
+        end
+    end, lists:sort(group(shuffle(Transitions)))),
+    Single ++ Multi.
 
 
-check_transition(Db, {Set1, Set2, Transition}) ->
+check_single_transition(Db, {Set1, Set2, Transition}) ->
     clear_views(Db),
     InitKVs = init_set(Set1, [a, b, c, d, e]),
     CommonKVs = reduce_set(Transition, InitKVs),
@@ -218,6 +246,38 @@ check_transition(Db, {Set1, Set2, Transition}) ->
     validate_index(Db, Sig, #{DocId => FinalJSONKVs}).
 
 
+check_multi_transition(Db, Transitions) ->
+    clear_views(Db),
+
+    {Docs, IdMap} = lists:mapfoldl(fun({Set1, Set2, Transition}, IdMapAcc) ->
+        DocId = couch_uuids:random(),
+        InitKVs = init_set(Set1, [a, b, c, d, e]),
+        CommonKVs = reduce_set(Transition, InitKVs),
+        FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+        {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+        {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+        InitDoc = make_doc(DocId, InitJSONKVs),
+        FinalDoc = make_doc(DocId, FinalJSONKVs),
+        {{InitDoc, FinalDoc}, maps:put(DocId, FinalJSONKVs, IdMapAcc)}
+    end, #{}, Transitions),
+
+    Sig = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        lists:foreach(fun({InitDoc, _}) ->
+            couch_views_fdb:write_doc(TxDb, Sig, [1], InitDoc)
+        end, Docs)
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        lists:foreach(fun({_, FinalDoc}) ->
+            couch_views_fdb:write_doc(TxDb, Sig, [1], FinalDoc)
+        end, Docs)
+    end),
+
+    validate_index(Db, Sig, IdMap).
+
+
 clear_views(Db) ->
     fabric2_fdb:transactional(Db, fun(TxDb) ->
         #{
@@ -241,6 +301,13 @@ validate_index(Db, Sig, ExpectRows) ->
         erlfdb:get_range(Tx, Start, End)
     end),
 
+    InitAcc = #{
+        row_count => 0,
+        kv_size => 0,
+        ids => #{},
+        rows => []
+    },
+
     MapData = lists:foldl(fun({Key, Value}, Acc) ->
         case erlfdb_tuple:unpack(Key, DbPrefix) of
             {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, 1} ->
@@ -255,7 +322,8 @@ validate_index(Db, Sig, ExpectRows) ->
                     false = maps:is_key(DocId, Ids),
                     maps:put(DocId, {TotalKeys, TotalSize, UniqueKeys}, Ids)
                 end, Acc);
-            {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, 1, {EncKey, DocId}, DupeId} ->
+            {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, 1, MapKey, _DupeId} ->
+                {EncKey, DocId} = MapKey,
                 {UserKey, UserVal} = erlfdb_tuple:unpack(Value),
 
                 UserJsonKey = couch_views_encoding:decode(UserKey),
@@ -265,10 +333,10 @@ validate_index(Db, Sig, ExpectRows) ->
                 EncKey = couch_views_encoding:encode(UserJsonKey, key),
 
                 maps:update_with(rows, fun(RAcc) ->
-                    [{DocId, UserJsonKey, DupeId, UserJsonVal} | RAcc]
+                    [{DocId, UserJsonKey, UserJsonVal} | RAcc]
                 end, Acc)
         end
-    end, #{ids => #{}, rows => []}, Rows),
+    end, InitAcc, Rows),
 
     #{
         row_count := RowCount,
@@ -281,15 +349,13 @@ validate_index(Db, Sig, ExpectRows) ->
         {KVAcc + TotalKVs, SAcc + TotalSize}
     end,
     {SumKVCount, SumKVSize} = maps:fold(SumFun, {0, 0}, MapIds),
-    ?debugFmt("~p ~p", [RowCount, length(MapRows)]),
-    ?debugFmt("~p", [ExpectRows]),
-    ?debugFmt("~p", [MapRows]),
     ?assertEqual(RowCount, length(MapRows)),
     ?assertEqual(RowCount, SumKVCount),
     ?assertEqual(KVSize, SumKVSize),
+    ?assert(KVSize >= 0),
 
     % Compare the found id entries to our row data
-    RegenIds = lists:foldl(fun({DocId, JsonKey, DupeId, JsonVal}, Acc) ->
+    RegenIds = lists:foldl(fun({DocId, JsonKey, JsonVal}, Acc) ->
         KeySize = erlang:external_size(JsonKey),
         ValSize = erlang:external_size(JsonVal),
         Default = {1, KeySize + ValSize, [JsonKey]},
@@ -430,3 +496,43 @@ gen_value() ->
         _ ->
             1
     end.
+
+
+sample_test() ->
+    Freq = case os:getenv("COUCH_VIEWS_SIZE_TEST_SAMPLE") of
+        false -> ?DEFAULT_TEST_PERCENTAGE;
+        FreqStr ->
+            try
+                list_to_float(FreqStr)
+            catch _:_ ->
+                ?DEFAULT_TEST_PERCENTAGE
+            end
+    end,
+    rand:uniform() < Freq.
+
+
+group(Items) ->
+    case length(Items) > 5 of
+        true ->
+            {Group, Rest} = lists:split(5, Items),
+            [lists:sort(Group) | group(Rest)];
+        false when Items == [] ->
+            [];
+        false ->
+            [lists:sort(Items)]
+    end.
+
+
+shuffle(Items) ->
+    Tagged = [{rand:uniform(), I} || I <- Items],
+    Sorted = lists:sort(Tagged),
+    [I || {_T, I} <- Sorted].
+
+
+tname([]) ->
+    [];
+tname([Transition | RestTransitions]) ->
+    [tname(Transition) | tname(RestTransitions)];
+tname({{N1, D1, R1}, {N2, D2, R2}, {DN, DD, DR}}) ->
+    io_lib:format("~b~b~b~b~b~b~b~b~b", [N1, D1, R1, N2, D2, R2, DN, DD, DR]).
+
author	Paul J. Davis <paul.joseph.davis@gmail.com>	2020-03-04 13:11:04 -0600
committer	Paul J. Davis <paul.joseph.davis@gmail.com>	2020-03-04 13:11:04 -0600
commit	983c4fa33f48a853e191376c47ab28ad8f45f297 (patch)
tree	146e9398af08c645a2887b62f697d300fe62f4f0
parent	02bae27c75365239e34fb0cff7f85ebd93e9b46f (diff)
download	couchdb-prototype/fdb-layer-couch-views-size-tests.tar.gz