diff options
author | Paul J. Davis <paul.joseph.davis@gmail.com> | 2017-11-02 12:26:01 -0500 |
---|---|---|
committer | Nick Vatamaniuc <vatamane@apache.org> | 2018-01-15 17:37:46 -0500 |
commit | 15b8f23acd1f296cb6085caaa8bdd17eab884076 (patch) | |
tree | 9519f612fe0e8fdeac106601b4a03526554a2e10 | |
parent | 08476f7028e7fa13ea0f98f924b059fcdb9dd67a (diff) | |
download | couchdb-15b8f23acd1f296cb6085caaa8bdd17eab884076.tar.gz |
Optimize document updates
This works by delaying the stemming step until all updates to a given
document have been processed.
-rw-r--r-- | src/couch/src/couch_db_updater.erl | 36 |
1 files changed, 20 insertions, 16 deletions
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index ca61e04c6..0daed9059 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -756,23 +756,24 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) -> erlang:put(last_id_merged, OldDocInfo#full_doc_info.id), % for debugging NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) -> - merge_rev_tree(OldInfoAcc, NewDoc, Client, Limit, MergeConflicts) + merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts) end, OldDocInfo, NewDocs), + NewDocInfo1 = stem_full_doc_info(NewDocInfo0, Limit), % When MergeConflicts is false, we updated #full_doc_info.deleted on every % iteration of merge_rev_tree. However, merge_rev_tree does not update % #full_doc_info.deleted when MergeConflicts is true, since we don't need % to know whether the doc is deleted between iterations. Since we still % need to know if the doc is deleted after the merge happens, we have to % set it here. - NewDocInfo1 = case MergeConflicts of + NewDocInfo2 = case MergeConflicts of true -> - NewDocInfo0#full_doc_info{ - deleted = couch_doc:is_deleted(NewDocInfo0) + NewDocInfo1#full_doc_info{ + deleted = couch_doc:is_deleted(NewDocInfo1) }; false -> - NewDocInfo0 + NewDocInfo1 end, - if NewDocInfo1 == OldDocInfo -> + if NewDocInfo2 == OldDocInfo -> % nothing changed merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, AccNewInfos, AccRemoveSeqs, AccSeq); @@ -781,7 +782,7 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], % important to note that the update_seq on OldDocInfo should % be identical to the value on NewDocInfo1. OldSeq = OldDocInfo#full_doc_info.update_seq, - NewDocInfo2 = NewDocInfo1#full_doc_info{ + NewDocInfo3 = NewDocInfo2#full_doc_info{ update_seq = AccSeq + 1 }, RemoveSeqs = case OldSeq of @@ -789,10 +790,10 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], _ -> [OldSeq | AccRemoveSeqs] end, merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, - [NewDocInfo2|AccNewInfos], RemoveSeqs, AccSeq+1) + [NewDocInfo3|AccNewInfos], RemoveSeqs, AccSeq+1) end. -merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) +merge_rev_tree(OldInfo, NewDoc, Client, false) when OldInfo#full_doc_info.deleted -> % We're recreating a document that was previously % deleted. To check that this is a recreation from @@ -816,7 +817,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) % Merge our modified new doc into the tree #full_doc_info{rev_tree=OldTree} = OldInfo, NewTree0 = couch_doc:to_path(NewDoc2), - case couch_key_tree:merge(OldTree, NewTree0, Limit) of + case couch_key_tree:merge(OldTree, NewTree0) of {NewTree1, new_leaf} -> % We changed the revision id so inform the caller send_result(Client, NewDoc, {ok, {OldPos+1, NewRevId}}), @@ -831,7 +832,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) send_result(Client, NewDoc, conflict), OldInfo end; -merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> +merge_rev_tree(OldInfo, NewDoc, Client, false) -> % We're attempting to merge a new revision into an % undeleted document. To not be a conflict we require % that the merge results in extending a branch. @@ -839,7 +840,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> OldTree = OldInfo#full_doc_info.rev_tree, NewTree0 = couch_doc:to_path(NewDoc), NewDeleted = NewDoc#doc.deleted, - case couch_key_tree:merge(OldTree, NewTree0, Limit) of + case couch_key_tree:merge(OldTree, NewTree0) of {NewTree, new_leaf} when not NewDeleted -> OldInfo#full_doc_info{ rev_tree = NewTree, @@ -857,17 +858,20 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> send_result(Client, NewDoc, conflict), OldInfo end; -merge_rev_tree(OldInfo, NewDoc, _Client, Limit, true) -> +merge_rev_tree(OldInfo, NewDoc, _Client, true) -> % We're merging in revisions without caring about % conflicts. Most likely this is a replication update. OldTree = OldInfo#full_doc_info.rev_tree, NewTree0 = couch_doc:to_path(NewDoc), - {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0, Limit), + {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0), OldInfo#full_doc_info{rev_tree = NewTree}. +stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) -> + Stemmed = couch_key_tree:stem(Tree, Limit), + Info#full_doc_info{rev_tree = Stemmed}. + stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) -> - [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} || - #full_doc_info{rev_tree=Tree}=Info <- DocInfos]. + lists:map(fun(FDI) -> stem_full_doc_info(FDI, Limit) end, DocInfos). update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> #db{ |