diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2017-09-18 17:59:55 -0400 |
---|---|---|
committer | Nick Vatamaniuc <vatamane@apache.org> | 2017-09-18 21:19:18 -0400 |
commit | b9206094f32d60a9cbe000e443909726e8e93715 (patch) | |
tree | 15f25ce3aadd0ee3b52fb3847c821e535b2c7f9d | |
parent | a333595ee66e122ea6fa76bfb1f72d76d609ac60 (diff) | |
download | couchdb-compactor-optimize-emsort-skip-compression.tar.gz |
Skip compression for EMSort datacompactor-optimize-emsort-skip-compression
EMSort data is either FDIs or {{Id, Seq}, Loc} KVs and using the default snappy
compression on it is wasteful. snappy is a NIF so no matter how small the data
there a cost associated with jumping to C and back.
Since snappy is always the default for append_term(s)/2 even if user explicitly
picks the default compression as something else in config files, have to
explicitly pass the option to skip compression in each function call.
-rw-r--r-- | src/couch/src/couch_db_updater.erl | 3 | ||||
-rw-r--r-- | src/couch/src/couch_emsort.erl | 9 |
2 files changed, 8 insertions, 4 deletions
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index 4786ee70f..f8cf764fc 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -1342,7 +1342,8 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) -> NewDb#db.seq_tree, NewInfos, RemoveSeqs), EMSortFd = couch_emsort:get_fd(NewDb#db.id_tree), - {ok, LocSizes} = couch_file:append_terms(EMSortFd, NewInfos), + EMOpts = [{compression, none}], + {ok, LocSizes} = couch_file:append_terms(EMSortFd, NewInfos, EMOpts), EMSortEntries = lists:zipwith(fun(FDI, {Loc, _}) -> #full_doc_info{ id = Id, diff --git a/src/couch/src/couch_emsort.erl b/src/couch/src/couch_emsort.erl index 2a25a2322..80edcbd2c 100644 --- a/src/couch/src/couch_emsort.erl +++ b/src/couch/src/couch_emsort.erl @@ -216,7 +216,8 @@ write_kvs(Ems, KVs) -> lists:foldr(fun(KV, Acc) -> append_item(Ems, Acc, KV, Ems#ems.chain_chunk) end, {[], nil}, lists:sort(KVs)), - {ok, Final, _} = couch_file:append_term(Ems#ems.fd, {LastKVs, LastPos}), + {ok, Final, _} = couch_file:append_term(Ems#ems.fd, {LastKVs, LastPos}, + [{compression, none}]), Final. @@ -263,7 +264,8 @@ merge_chains(Ems, Choose, BB) -> merge_chains(Ems, _Choose, [], ChainAcc) -> - {ok, CPos, _} = couch_file:append_term(Ems#ems.fd, ChainAcc), + {ok, CPos, _} = couch_file:append_term(Ems#ems.fd, ChainAcc, + [{compression, none}]), CPos; merge_chains(#ems{chain_chunk=CC}=Ems, Choose, Chains, Acc) -> {KV, RestChains} = choose_kv(Choose, Ems, Chains), @@ -311,7 +313,8 @@ ins_big_chain(Rest, Chain, Acc) -> append_item(Ems, {List, Prev}, Pos, Size) when length(List) >= Size -> - {ok, PrevList, _} = couch_file:append_term(Ems#ems.fd, {List, Prev}), + {ok, PrevList, _} = couch_file:append_term(Ems#ems.fd, {List, Prev}, + [{compression, none}]), {[Pos], PrevList}; append_item(_Ems, {List, Prev}, Pos, _Size) -> {[Pos | List], Prev}. |