diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2017-09-22 01:02:47 -0400 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2017-09-25 13:19:10 -0400 |
commit | 8d1c7043731fbaa5f4f93243df5144416c946604 (patch) | |
tree | 6a726da2c6fe8c4bf717953504aa717f5c13a542 | |
parent | 00df0def7fb8107717ed2274b88f855d18402a72 (diff) | |
download | couchdb-8d1c7043731fbaa5f4f93243df5144416c946604.tar.gz |
Avoid decompressing just to calculate external size
Use snappy's `uncompressed_length` and external binary format's binary spec to
get uncompressed size.
http://erlang.org/doc/apps/erts/erl_ext_dist.html
`erlang:external_size` is function provided since R16B3 use it without the
`try ... catch` fallback. Also make sure to use `[{minor_version, 1}]` to match
what `?term_to_bin` macro does.
Fixes #835
-rw-r--r-- | src/couch/include/couch_db.hrl | 7 | ||||
-rw-r--r-- | src/couch/src/couch_compress.erl | 14 | ||||
-rw-r--r-- | src/couch/src/couch_db_updater.erl | 9 | ||||
-rw-r--r-- | src/couch/test/couch_compress_tests.erl | 11 | ||||
-rw-r--r-- | src/couch/test/couchdb_file_compression_tests.erl | 2 |
5 files changed, 31 insertions, 12 deletions
diff --git a/src/couch/include/couch_db.hrl b/src/couch/include/couch_db.hrl index 7049c6e5f..17ef4c976 100644 --- a/src/couch/include/couch_db.hrl +++ b/src/couch/include/couch_db.hrl @@ -30,12 +30,7 @@ -define(i2b(V), couch_util:integer_to_boolean(V)). -define(b2i(V), couch_util:boolean_to_integer(V)). -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])). --define(term_size(T), - try - erlang:external_size(T) - catch _:_ -> - byte_size(?term_to_bin(T)) - end). +-define(term_size(T), erlang:external_size(T, [{minor_version, 1}])). -define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>). diff --git a/src/couch/src/couch_compress.erl b/src/couch/src/couch_compress.erl index 71588b228..cfcc2a481 100644 --- a/src/couch/src/couch_compress.erl +++ b/src/couch/src/couch_compress.erl @@ -14,6 +14,7 @@ -export([compress/2, decompress/1, is_compressed/2]). -export([get_compression_method/0]). +-export([uncompressed_size/1]). -include_lib("couch/include/couch_db.hrl"). @@ -83,3 +84,16 @@ is_compressed(Term, _Method) when not is_binary(Term) -> is_compressed(_, _) -> error(invalid_compression). + +uncompressed_size(<<?SNAPPY_PREFIX, Rest/binary>>) -> + {ok, Size} = snappy:uncompressed_length(Rest), + Size; +uncompressed_size(<<?COMPRESSED_TERM_PREFIX, Size:32, _/binary>> = _Bin) -> + % See http://erlang.org/doc/apps/erts/erl_ext_dist.html + % The uncompressed binary would be encoded with <<131, Rest/binary>> + % so need to add 1 for 131 + Size + 1; +uncompressed_size(<<?TERM_PREFIX, _/binary>> = Bin) -> + byte_size(Bin); +uncompressed_size(_) -> + error(invalid_compression). diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index 78e0b8c19..f0b65057a 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -1079,14 +1079,13 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) -> {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd), % In the future, we should figure out how to do this for % upgrade purposes. - EJsonBody = case is_binary(Body) of + ExternalSize = case is_binary(Body) of true -> - couch_compress:decompress(Body); + couch_compress:uncompressed_size(Body); false -> - Body + ?term_size(Body) end, SummaryChunk = make_doc_summary(NewDb, {Body, AttInfos}), - ExternalSize = ?term_size(EJsonBody), {ok, Pos, SummarySize} = couch_file:append_raw_chunk( DestFd, SummaryChunk), AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos], @@ -1472,7 +1471,7 @@ get_meta_body_size(Meta, Summary) -> {ejson_size, ExternalSize} -> ExternalSize; false -> - ?term_size(couch_compress:decompress(Summary)) + couch_compress:uncompressed_size(Summary) end. diff --git a/src/couch/test/couch_compress_tests.erl b/src/couch/test/couch_compress_tests.erl index 6d6e6a792..addb9a0e2 100644 --- a/src/couch/test/couch_compress_tests.erl +++ b/src/couch/test/couch_compress_tests.erl @@ -72,3 +72,14 @@ is_compressed_test_() -> ?_assertError(invalid_compression, couch_compress:is_compressed(?CORRUPT, snappy)) ]. + +uncompressed_size_test_() -> + [ + ?_assertEqual(49, couch_compress:uncompressed_size(?NONE)), + ?_assertEqual(49, couch_compress:uncompressed_size(?DEFLATE)), + ?_assertEqual(49, couch_compress:uncompressed_size(?SNAPPY)), + ?_assertEqual(5, couch_compress:uncompressed_size( + couch_compress:compress(x, {deflate, 9}))), + ?_assertError(invalid_compression, + couch_compress:uncompressed_size(?CORRUPT)) + ]. diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl index 09fead582..8f0fe5bf1 100644 --- a/src/couch/test/couchdb_file_compression_tests.erl +++ b/src/couch/test/couchdb_file_compression_tests.erl @@ -157,7 +157,7 @@ compare_compression_methods(DbName) -> ?assert(DbSizeDeflate1 > DbSizeDeflate9), ?assert(ViewSizeDeflate1 > ViewSizeDeflate9), - ?assert(ExternalSizePreCompact =:= ExternalSizeNone), + ?assert(ExternalSizePreCompact >= ExternalSizeNone), ?assert(ExternalSizeNone =:= ExternalSizeSnappy), ?assert(ExternalSizeNone =:= ExternalSizeDeflate9), ?assert(ViewExternalSizeNone =:= ViewExternalSizeSnappy), |