summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2017-11-02 12:26:01 -0500
committerNick Vatamaniuc <vatamane@apache.org>2018-01-15 17:37:46 -0500
commit15b8f23acd1f296cb6085caaa8bdd17eab884076 (patch)
tree9519f612fe0e8fdeac106601b4a03526554a2e10
parent08476f7028e7fa13ea0f98f924b059fcdb9dd67a (diff)
downloadcouchdb-15b8f23acd1f296cb6085caaa8bdd17eab884076.tar.gz
Optimize document updates
This works by delaying the stemming step until all updates to a given document have been processed.
-rw-r--r--src/couch/src/couch_db_updater.erl36
1 files changed, 20 insertions, 16 deletions
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index ca61e04c6..0daed9059 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -756,23 +756,24 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
[OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) ->
erlang:put(last_id_merged, OldDocInfo#full_doc_info.id), % for debugging
NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) ->
- merge_rev_tree(OldInfoAcc, NewDoc, Client, Limit, MergeConflicts)
+ merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts)
end, OldDocInfo, NewDocs),
+ NewDocInfo1 = stem_full_doc_info(NewDocInfo0, Limit),
% When MergeConflicts is false, we updated #full_doc_info.deleted on every
% iteration of merge_rev_tree. However, merge_rev_tree does not update
% #full_doc_info.deleted when MergeConflicts is true, since we don't need
% to know whether the doc is deleted between iterations. Since we still
% need to know if the doc is deleted after the merge happens, we have to
% set it here.
- NewDocInfo1 = case MergeConflicts of
+ NewDocInfo2 = case MergeConflicts of
true ->
- NewDocInfo0#full_doc_info{
- deleted = couch_doc:is_deleted(NewDocInfo0)
+ NewDocInfo1#full_doc_info{
+ deleted = couch_doc:is_deleted(NewDocInfo1)
};
false ->
- NewDocInfo0
+ NewDocInfo1
end,
- if NewDocInfo1 == OldDocInfo ->
+ if NewDocInfo2 == OldDocInfo ->
% nothing changed
merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
AccNewInfos, AccRemoveSeqs, AccSeq);
@@ -781,7 +782,7 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
% important to note that the update_seq on OldDocInfo should
% be identical to the value on NewDocInfo1.
OldSeq = OldDocInfo#full_doc_info.update_seq,
- NewDocInfo2 = NewDocInfo1#full_doc_info{
+ NewDocInfo3 = NewDocInfo2#full_doc_info{
update_seq = AccSeq + 1
},
RemoveSeqs = case OldSeq of
@@ -789,10 +790,10 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
_ -> [OldSeq | AccRemoveSeqs]
end,
merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
- [NewDocInfo2|AccNewInfos], RemoveSeqs, AccSeq+1)
+ [NewDocInfo3|AccNewInfos], RemoveSeqs, AccSeq+1)
end.
-merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
+merge_rev_tree(OldInfo, NewDoc, Client, false)
when OldInfo#full_doc_info.deleted ->
% We're recreating a document that was previously
% deleted. To check that this is a recreation from
@@ -816,7 +817,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
% Merge our modified new doc into the tree
#full_doc_info{rev_tree=OldTree} = OldInfo,
NewTree0 = couch_doc:to_path(NewDoc2),
- case couch_key_tree:merge(OldTree, NewTree0, Limit) of
+ case couch_key_tree:merge(OldTree, NewTree0) of
{NewTree1, new_leaf} ->
% We changed the revision id so inform the caller
send_result(Client, NewDoc, {ok, {OldPos+1, NewRevId}}),
@@ -831,7 +832,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
send_result(Client, NewDoc, conflict),
OldInfo
end;
-merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) ->
+merge_rev_tree(OldInfo, NewDoc, Client, false) ->
% We're attempting to merge a new revision into an
% undeleted document. To not be a conflict we require
% that the merge results in extending a branch.
@@ -839,7 +840,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) ->
OldTree = OldInfo#full_doc_info.rev_tree,
NewTree0 = couch_doc:to_path(NewDoc),
NewDeleted = NewDoc#doc.deleted,
- case couch_key_tree:merge(OldTree, NewTree0, Limit) of
+ case couch_key_tree:merge(OldTree, NewTree0) of
{NewTree, new_leaf} when not NewDeleted ->
OldInfo#full_doc_info{
rev_tree = NewTree,
@@ -857,17 +858,20 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) ->
send_result(Client, NewDoc, conflict),
OldInfo
end;
-merge_rev_tree(OldInfo, NewDoc, _Client, Limit, true) ->
+merge_rev_tree(OldInfo, NewDoc, _Client, true) ->
% We're merging in revisions without caring about
% conflicts. Most likely this is a replication update.
OldTree = OldInfo#full_doc_info.rev_tree,
NewTree0 = couch_doc:to_path(NewDoc),
- {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0, Limit),
+ {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0),
OldInfo#full_doc_info{rev_tree = NewTree}.
+stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) ->
+ Stemmed = couch_key_tree:stem(Tree, Limit),
+ Info#full_doc_info{rev_tree = Stemmed}.
+
stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) ->
- [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} ||
- #full_doc_info{rev_tree=Tree}=Info <- DocInfos].
+ lists:map(fun(FDI) -> stem_full_doc_info(FDI, Limit) end, DocInfos).
update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
#db{