summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2022-06-08 12:25:24 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2022-06-10 10:16:54 -0400
commitd2b233ec0e04869394b03d5344aac99cfc28e1ab (patch)
tree3e7929bc95af116c7c4354973241c1420ee3d197
parent35a8cbf7a5b3bf3c8e9e30182ee95e3a10b5ac5d (diff)
downloadcouchdb-d2b233ec0e04869394b03d5344aac99cfc28e1ab.tar.gz
Optimize couch_key_tree:find_missing/2
Previously we always fully traversed the search keys list, when we partitioned it into possible and impossible sublists. The optimization is we can stop traversing early if the search keys are sorted as soon as we hit a key that is >= than the current position in the tree. To keep things simple we can use the lists:splitwith/2 function which seems to do what we want [1]. [1] https://github.com/erlang/otp/blob/40922798411c2d23ee8a99456f96d6637c62b762/lib/stdlib/src/lists.erl#L1426-L1435
-rw-r--r--src/couch/src/couch_key_tree.erl32
1 files changed, 16 insertions, 16 deletions
diff --git a/src/couch/src/couch_key_tree.erl b/src/couch/src/couch_key_tree.erl
index ff294c5d3..5bc37bb62 100644
--- a/src/couch/src/couch_key_tree.erl
+++ b/src/couch/src/couch_key_tree.erl
@@ -209,27 +209,27 @@ merge_extend([Tree | RestA], NextB) ->
end,
{[Tree | Merged], Result}.
-find_missing(_Tree, []) ->
+find_missing(Tree, SearchKeys) ->
+ find_missing_sorted(Tree, lists:sort(SearchKeys)).
+
+find_missing_sorted(_Tree, []) ->
[];
-find_missing([], SeachKeys) ->
- SeachKeys;
-find_missing([{Start, {Key, Value, SubTree}} | RestTree], SeachKeys) ->
- PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Start],
- ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Start],
- Missing = find_missing_simple(Start, [{Key, Value, SubTree}], PossibleKeys),
- find_missing(RestTree, ImpossibleKeys ++ Missing).
+find_missing_sorted([], SearchKeys) ->
+ SearchKeys;
+find_missing_sorted([{Start, {Key, Value, SubTree}} | RestTree], SearchKeys) ->
+ {Impossible, Possible} = lists:splitwith(fun({K, _}) -> K < Start end, SearchKeys),
+ Missing = find_missing_simple(Start, [{Key, Value, SubTree}], Possible),
+ find_missing_sorted(RestTree, Impossible ++ Missing).
find_missing_simple(_Pos, _Tree, []) ->
[];
-find_missing_simple(_Pos, [], SeachKeys) ->
- SeachKeys;
-find_missing_simple(Pos, [{Key, _, SubTree} | RestTree], SeachKeys) ->
- PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Pos],
- ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Pos],
-
- SrcKeys2 = PossibleKeys -- [{Pos, Key}],
+find_missing_simple(_Pos, [], SearchKeys) ->
+ SearchKeys;
+find_missing_simple(Pos, [{Key, _, SubTree} | RestTree], SearchKeys) ->
+ {Impossible, Possible} = lists:splitwith(fun({K, _}) -> K < Pos end, SearchKeys),
+ SrcKeys2 = Possible -- [{Pos, Key}],
SrcKeys3 = find_missing_simple(Pos + 1, SubTree, SrcKeys2),
- ImpossibleKeys ++ find_missing_simple(Pos, RestTree, SrcKeys3).
+ Impossible ++ find_missing_simple(Pos, RestTree, SrcKeys3).
filter_leafs([], _Keys, FilteredAcc, RemovedKeysAcc) ->
{FilteredAcc, RemovedKeysAcc};