diff options
author | Paul J. Davis <paul.joseph.davis@gmail.com> | 2017-09-08 09:40:52 -0500 |
---|---|---|
committer | Paul J. Davis <paul.joseph.davis@gmail.com> | 2020-04-14 11:26:13 -0500 |
commit | 1aef628628f52f1fc12729fcdb326b7897147257 (patch) | |
tree | 0e0365766927a7ffadb27ba9633d3d8748202556 | |
parent | 2c9477ec87b9fff2672d1891797a994a29fe17f5 (diff) | |
download | couchdb-1aef628628f52f1fc12729fcdb326b7897147257.tar.gz |
Optimize compactor to use multi-IO API
This updates couch_db_updater to use the new multi-IO API functions
(append_terms/pread_terms) in couch_file. This optimization benefits us
by no longer requiring the `couch_emsort:merge/1` step to copy
`#full_doc_info{}` records multiple times while also not being penalized
by signficantly increasing the number of calls through couch_file APIs.
-rw-r--r-- | src/couch/src/couch_bt_engine_compactor.erl | 54 |
1 files changed, 33 insertions, 21 deletions
diff --git a/src/couch/src/couch_bt_engine_compactor.erl b/src/couch/src/couch_bt_engine_compactor.erl index 50b3981d5..92ccea575 100644 --- a/src/couch/src/couch_bt_engine_compactor.erl +++ b/src/couch/src/couch_bt_engine_compactor.erl @@ -36,11 +36,12 @@ }). -record(merge_st, { + src_fd, id_tree, seq_tree, curr, rem_seqs, - infos + locs }). @@ -441,10 +442,16 @@ copy_docs(St, #st{} = NewSt, MixedInfos, Retry) -> {ok, SeqTree} = couch_btree:add_remove( NewSt#st.seq_tree, NewInfos, RemoveSeqs), - FDIKVs = lists:map(fun(#full_doc_info{id=Id, update_seq=Seq}=FDI) -> - {{Id, Seq}, FDI} - end, NewInfos), - {ok, IdEms} = couch_emsort:add(NewSt#st.id_tree, FDIKVs), + EMSortFd = couch_emsort:get_fd(NewSt#st.id_tree), + {ok, LocSizes} = couch_file:append_terms(EMSortFd, NewInfos), + EMSortEntries = lists:zipwith(fun(FDI, {Loc, _}) -> + #full_doc_info{ + id = Id, + update_seq = Seq + } = FDI, + {{Id, Seq}, Loc} + end, NewInfos, LocSizes), + {ok, IdEms} = couch_emsort:add(NewSt#st.id_tree, EMSortEntries), update_compact_task(length(NewInfos)), NewSt#st{id_tree=IdEms, seq_tree=SeqTree}. @@ -510,6 +517,7 @@ copy_meta_data(#comp_st{new_st = St} = CompSt) -> header = Header, id_tree = Src } = St, + SrcFd = couch_emsort:get_fd(Src), DstState = couch_bt_engine_header:id_tree_state(Header), {ok, IdTree0} = couch_btree:open(DstState, Fd, [ {split, fun couch_bt_engine:id_tree_split/1}, @@ -518,14 +526,16 @@ copy_meta_data(#comp_st{new_st = St} = CompSt) -> ]), {ok, Iter} = couch_emsort:iter(Src), Acc0 = #merge_st{ + src_fd=SrcFd, id_tree=IdTree0, seq_tree=St#st.seq_tree, rem_seqs=[], - infos=[] + locs=[] }, ?COMP_EVENT(md_copy_init), Acc = merge_docids(Iter, Acc0), - {ok, IdTree} = couch_btree:add(Acc#merge_st.id_tree, Acc#merge_st.infos), + {ok, Infos} = couch_file:pread_terms(SrcFd, Acc#merge_st.locs), + {ok, IdTree} = couch_btree:add(Acc#merge_st.id_tree, Infos), {ok, SeqTree} = couch_btree:add_remove( Acc#merge_st.seq_tree, [], Acc#merge_st.rem_seqs ), @@ -627,34 +637,36 @@ merge_lookups([FDI | RestInfos], Lookups) -> [FDI | merge_lookups(RestInfos, Lookups)]. -merge_docids(Iter, #merge_st{infos=Infos}=Acc) when length(Infos) > 1000 -> +merge_docids(Iter, #merge_st{locs=Locs}=Acc) when length(Locs) > 1000 -> #merge_st{ + src_fd=SrcFd, id_tree=IdTree0, seq_tree=SeqTree0, rem_seqs=RemSeqs } = Acc, + {ok, Infos} = couch_file:pread_terms(SrcFd, Locs), {ok, IdTree1} = couch_btree:add(IdTree0, Infos), {ok, SeqTree1} = couch_btree:add_remove(SeqTree0, [], RemSeqs), Acc1 = Acc#merge_st{ id_tree=IdTree1, seq_tree=SeqTree1, rem_seqs=[], - infos=[] + locs=[] }, merge_docids(Iter, Acc1); merge_docids(Iter, #merge_st{curr=Curr}=Acc) -> case next_info(Iter, Curr, []) of - {NextIter, NewCurr, FDI, Seqs} -> + {NextIter, NewCurr, Loc, Seqs} -> Acc1 = Acc#merge_st{ - infos = [FDI | Acc#merge_st.infos], + locs = [Loc | Acc#merge_st.locs], rem_seqs = Seqs ++ Acc#merge_st.rem_seqs, curr = NewCurr }, ?COMP_EVENT(md_copy_row), merge_docids(NextIter, Acc1); - {finished, FDI, Seqs} -> + {finished, Loc, Seqs} -> Acc#merge_st{ - infos = [FDI | Acc#merge_st.infos], + locs = [Loc | Acc#merge_st.locs], rem_seqs = Seqs ++ Acc#merge_st.rem_seqs, curr = undefined }; @@ -665,19 +677,19 @@ merge_docids(Iter, #merge_st{curr=Curr}=Acc) -> next_info(Iter, undefined, []) -> case couch_emsort:next(Iter) of - {ok, {{Id, Seq}, FDI}, NextIter} -> - next_info(NextIter, {Id, Seq, FDI}, []); + {ok, {{Id, Seq}, Loc}, NextIter} -> + next_info(NextIter, {Id, Seq, Loc}, []); finished -> empty end; -next_info(Iter, {Id, Seq, FDI}, Seqs) -> +next_info(Iter, {Id, Seq, Loc}, Seqs) -> case couch_emsort:next(Iter) of - {ok, {{Id, NSeq}, NFDI}, NextIter} -> - next_info(NextIter, {Id, NSeq, NFDI}, [Seq | Seqs]); - {ok, {{NId, NSeq}, NFDI}, NextIter} -> - {NextIter, {NId, NSeq, NFDI}, FDI, Seqs}; + {ok, {{Id, NSeq}, NLoc}, NextIter} -> + next_info(NextIter, {Id, NSeq, NLoc}, [Seq | Seqs]); + {ok, {{NId, NSeq}, NLoc}, NextIter} -> + {NextIter, {NId, NSeq, NLoc}, Loc, Seqs}; finished -> - {finished, FDI, Seqs} + {finished, Loc, Seqs} end. |