summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2022-06-10 16:33:04 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2022-06-11 14:02:25 -0400
commit53adf72f48b958c0c85756c3e83d16e60657e5d2 (patch)
treede5a38c442b4c968f34ae1404a06a5b5bc79ba58
parentadde37ab813b2ae39c25d891b646191498b35872 (diff)
downloadcouchdb-53adf72f48b958c0c85756c3e83d16e60657e5d2.tar.gz
Reduce complexity of "possible_ancestors" from quadratic to linear.
"Possible ancestors" are all the leaf revisions with a position less than any of the missing revisions. Previously, for every leaf revision we potentially checked every missing revision. And with missing revisions list being sorted from smallest to largest, that meant often traversing most of the missing revisions list. In general, it meant performing O(R * M) operations, where R is the number of leaf revision, and M is the number of missing revisions. The optimization is instead of comparing against every single missing revision we can compare only against the highest one. If a leaf revision is less than highest missing revison, then it is already a possible ancestor, so we can stop checking. Thus, we can go from a quadratic O(R * M) to an O(R) + O(M) complexity, which, when merging large conflicting documents could give us a nice performance boost.
-rw-r--r--src/chttpd/test/eunit/chttpd_revs_diff_tests.erl29
-rw-r--r--src/couch/src/couch_db.erl29
2 files changed, 24 insertions, 34 deletions
diff --git a/src/chttpd/test/eunit/chttpd_revs_diff_tests.erl b/src/chttpd/test/eunit/chttpd_revs_diff_tests.erl
index 9a9bd25b7..7cdb81fa2 100644
--- a/src/chttpd/test/eunit/chttpd_revs_diff_tests.erl
+++ b/src/chttpd/test/eunit/chttpd_revs_diff_tests.erl
@@ -124,18 +124,23 @@ t_revs_diff_some_missing_some_not({Top, Db}) ->
},
{Code, Res} = req(post, Top ++ Db ++ "/_revs_diff", Body),
?assertEqual(200, Code),
- ?assertEqual(
- #{
- ?DOC1 => #{
- <<"missing">> => [<<"1-xyz">>, <<"2-def">>, <<"3-klm">>],
- <<"possible_ancestors">> => [<<"2-revb">>, <<"2-revc">>]
- },
- ?DOC2 => #{
- <<"missing">> => [<<"1-pqr">>]
- }
- },
- Res
- ).
+
+ ?assert(maps:is_key(?DOC1, Res)),
+ ?assert(maps:is_key(?DOC2, Res)),
+ #{?DOC1 := Doc1, ?DOC2 := Doc2} = Res,
+
+ ?assert(maps:is_key(<<"missing">>, Doc1)),
+ ?assert(maps:is_key(<<"missing">>, Doc2)),
+ #{<<"missing">> := Missing1} = Doc1,
+ #{<<"missing">> := Missing2} = Doc2,
+ ?assertEqual([<<"1-xyz">>, <<"2-def">>, <<"3-klm">>], lists:sort(Missing1)),
+ ?assertEqual([<<"1-pqr">>], Missing2),
+
+ ?assert(maps:is_key(<<"possible_ancestors">>, Doc1)),
+ ?assert(not maps:is_key(<<"possible_ancestors">>, Doc2)),
+
+ #{<<"possible_ancestors">> := PAs1} = Doc1,
+ ?assertEqual([<<"2-revb">>, <<"2-revc">>], lists:sort(PAs1)).
t_empty_missing_revs({Top, Db}) ->
{Code, Res} = req(post, Top ++ Db ++ "/_missing_revs", #{}),
diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index 572746229..70ba1c2b9 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -2102,28 +2102,13 @@ possible_ancestors(_FullInfo, []) ->
possible_ancestors(FullInfo, MissingRevs) ->
#doc_info{revs = RevsInfo} = couch_doc:to_doc_info(FullInfo),
LeafRevs = [Rev || #rev_info{rev = Rev} <- RevsInfo],
- % Find the revs that are possible parents of this rev
- lists:foldl(
- fun({LeafPos, LeafRevId}, Acc) ->
- % this leaf is a "possible ancenstor" of the missing
- % revs if this LeafPos lessthan any of the missing revs
- case
- lists:any(
- fun({MissingPos, _}) ->
- LeafPos < MissingPos
- end,
- MissingRevs
- )
- of
- true ->
- [{LeafPos, LeafRevId} | Acc];
- false ->
- Acc
- end
- end,
- [],
- LeafRevs
- ).
+ % Find the revs that are possible ancestors of this rev. A leaf is
+ % a possible ancestor if its position is less than any of the
+ % missing revs, and if it is less than any, it means it is also
+ % less than the maximum missing rev, so we just compare against
+ % that.
+ {MaxMissingPos, _} = lists:max(MissingRevs),
+ lists:filter(fun({Pos, _}) -> Pos < MaxMissingPos end, LeafRevs).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").