summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2017-09-22 01:02:47 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2017-09-25 13:19:10 -0400
commit8d1c7043731fbaa5f4f93243df5144416c946604 (patch)
tree6a726da2c6fe8c4bf717953504aa717f5c13a542
parent00df0def7fb8107717ed2274b88f855d18402a72 (diff)
downloadcouchdb-8d1c7043731fbaa5f4f93243df5144416c946604.tar.gz
Avoid decompressing just to calculate external size
Use snappy's `uncompressed_length` and external binary format's binary spec to get uncompressed size. http://erlang.org/doc/apps/erts/erl_ext_dist.html `erlang:external_size` is function provided since R16B3 use it without the `try ... catch` fallback. Also make sure to use `[{minor_version, 1}]` to match what `?term_to_bin` macro does. Fixes #835
-rw-r--r--src/couch/include/couch_db.hrl7
-rw-r--r--src/couch/src/couch_compress.erl14
-rw-r--r--src/couch/src/couch_db_updater.erl9
-rw-r--r--src/couch/test/couch_compress_tests.erl11
-rw-r--r--src/couch/test/couchdb_file_compression_tests.erl2
5 files changed, 31 insertions, 12 deletions
diff --git a/src/couch/include/couch_db.hrl b/src/couch/include/couch_db.hrl
index 7049c6e5f..17ef4c976 100644
--- a/src/couch/include/couch_db.hrl
+++ b/src/couch/include/couch_db.hrl
@@ -30,12 +30,7 @@
-define(i2b(V), couch_util:integer_to_boolean(V)).
-define(b2i(V), couch_util:boolean_to_integer(V)).
-define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
--define(term_size(T),
- try
- erlang:external_size(T)
- catch _:_ ->
- byte_size(?term_to_bin(T))
- end).
+-define(term_size(T), erlang:external_size(T, [{minor_version, 1}])).
-define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>).
diff --git a/src/couch/src/couch_compress.erl b/src/couch/src/couch_compress.erl
index 71588b228..cfcc2a481 100644
--- a/src/couch/src/couch_compress.erl
+++ b/src/couch/src/couch_compress.erl
@@ -14,6 +14,7 @@
-export([compress/2, decompress/1, is_compressed/2]).
-export([get_compression_method/0]).
+-export([uncompressed_size/1]).
-include_lib("couch/include/couch_db.hrl").
@@ -83,3 +84,16 @@ is_compressed(Term, _Method) when not is_binary(Term) ->
is_compressed(_, _) ->
error(invalid_compression).
+
+uncompressed_size(<<?SNAPPY_PREFIX, Rest/binary>>) ->
+ {ok, Size} = snappy:uncompressed_length(Rest),
+ Size;
+uncompressed_size(<<?COMPRESSED_TERM_PREFIX, Size:32, _/binary>> = _Bin) ->
+ % See http://erlang.org/doc/apps/erts/erl_ext_dist.html
+ % The uncompressed binary would be encoded with <<131, Rest/binary>>
+ % so need to add 1 for 131
+ Size + 1;
+uncompressed_size(<<?TERM_PREFIX, _/binary>> = Bin) ->
+ byte_size(Bin);
+uncompressed_size(_) ->
+ error(invalid_compression).
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index 78e0b8c19..f0b65057a 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -1079,14 +1079,13 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
{Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
% In the future, we should figure out how to do this for
% upgrade purposes.
- EJsonBody = case is_binary(Body) of
+ ExternalSize = case is_binary(Body) of
true ->
- couch_compress:decompress(Body);
+ couch_compress:uncompressed_size(Body);
false ->
- Body
+ ?term_size(Body)
end,
SummaryChunk = make_doc_summary(NewDb, {Body, AttInfos}),
- ExternalSize = ?term_size(EJsonBody),
{ok, Pos, SummarySize} = couch_file:append_raw_chunk(
DestFd, SummaryChunk),
AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
@@ -1472,7 +1471,7 @@ get_meta_body_size(Meta, Summary) ->
{ejson_size, ExternalSize} ->
ExternalSize;
false ->
- ?term_size(couch_compress:decompress(Summary))
+ couch_compress:uncompressed_size(Summary)
end.
diff --git a/src/couch/test/couch_compress_tests.erl b/src/couch/test/couch_compress_tests.erl
index 6d6e6a792..addb9a0e2 100644
--- a/src/couch/test/couch_compress_tests.erl
+++ b/src/couch/test/couch_compress_tests.erl
@@ -72,3 +72,14 @@ is_compressed_test_() ->
?_assertError(invalid_compression,
couch_compress:is_compressed(?CORRUPT, snappy))
].
+
+uncompressed_size_test_() ->
+ [
+ ?_assertEqual(49, couch_compress:uncompressed_size(?NONE)),
+ ?_assertEqual(49, couch_compress:uncompressed_size(?DEFLATE)),
+ ?_assertEqual(49, couch_compress:uncompressed_size(?SNAPPY)),
+ ?_assertEqual(5, couch_compress:uncompressed_size(
+ couch_compress:compress(x, {deflate, 9}))),
+ ?_assertError(invalid_compression,
+ couch_compress:uncompressed_size(?CORRUPT))
+ ].
diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl
index 09fead582..8f0fe5bf1 100644
--- a/src/couch/test/couchdb_file_compression_tests.erl
+++ b/src/couch/test/couchdb_file_compression_tests.erl
@@ -157,7 +157,7 @@ compare_compression_methods(DbName) ->
?assert(DbSizeDeflate1 > DbSizeDeflate9),
?assert(ViewSizeDeflate1 > ViewSizeDeflate9),
- ?assert(ExternalSizePreCompact =:= ExternalSizeNone),
+ ?assert(ExternalSizePreCompact >= ExternalSizeNone),
?assert(ExternalSizeNone =:= ExternalSizeSnappy),
?assert(ExternalSizeNone =:= ExternalSizeDeflate9),
?assert(ViewExternalSizeNone =:= ViewExternalSizeSnappy),