summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2020-04-04 16:45:52 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2020-04-07 13:55:53 -0400
commitd6ec9935453c4f0fe26174a472cdf3e4cb9c5e60 (patch)
tree09f79be1b964ce566d47a4735dd754cb295a5de4
parent6c1d7a993571d9e0e2304b12bbeaf8abb146cb0e (diff)
downloadcouchdb-d6ec9935453c4f0fe26174a472cdf3e4cb9c5e60.tar.gz
Compress doc bodies and attachments
In CouchDB < 4.x we compressed document bodies by default, so enable it for 4.x as well. Use the basic term_to_binary compression mechanism for: - Document bodies - Local document bodies - Attachments, but only if they have not already been compressed.
-rw-r--r--src/fabric/include/fabric2.hrl4
-rw-r--r--src/fabric/src/fabric2_db.erl3
-rw-r--r--src/fabric/src/fabric2_fdb.erl42
-rw-r--r--src/fabric/test/fabric2_doc_att_tests.erl52
4 files changed, 90 insertions, 11 deletions
diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl
index e12762260..587b4f888 100644
--- a/src/fabric/include/fabric2.hrl
+++ b/src/fabric/include/fabric2.hrl
@@ -55,6 +55,10 @@
-define(CURR_LDOC_FORMAT, 0).
+% 0 - Attachment storage version
+
+-define(CURR_ATT_STORAGE_VER, 0).
+
% Misc constants
-define(PDICT_DB_KEY, '$fabric_db_handle').
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index 3d6d9245e..9b9efdac2 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -913,7 +913,8 @@ read_attachment(Db, DocId, AttId) ->
write_attachment(Db, DocId, Att) ->
Data = couch_att:fetch(data, Att),
- {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data),
+ Encoding = couch_att:fetch(encoding, Att),
+ {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data, Encoding),
couch_att:store(data, {loc, Db, DocId, AttId}, Att).
diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl
index 430693329..d96c3ae60 100644
--- a/src/fabric/src/fabric2_fdb.erl
+++ b/src/fabric/src/fabric2_fdb.erl
@@ -57,7 +57,7 @@
write_local_doc/2,
read_attachment/3,
- write_attachment/3,
+ write_attachment/4,
get_last_change/1,
@@ -971,26 +971,53 @@ read_attachment(#{} = Db, DocId, AttId) ->
} = ensure_current(Db),
AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix),
- case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of
+ Data = case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of
not_found ->
throw({not_found, missing});
KVs ->
Vs = [V || {_K, V} <- KVs],
iolist_to_binary(Vs)
+ end,
+
+ IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
+ case erlfdb:wait(erlfdb:get(Tx, IdKey)) of
+ <<>> ->
+ Data; % Old format, before CURR_ATT_STORAGE_VER = 0
+ <<_/binary>> = InfoBin ->
+ {?CURR_ATT_STORAGE_VER, Compressed} = erlfdb_tuple:unpack(InfoBin),
+ case Compressed of
+ true -> binary_to_term(Data, [safe]);
+ false -> Data
+ end
end.
-write_attachment(#{} = Db, DocId, Data) when is_binary(Data) ->
+write_attachment(#{} = Db, DocId, Data, Encoding)
+ when is_binary(Data), is_atom(Encoding) ->
#{
tx := Tx,
db_prefix := DbPrefix
} = ensure_current(Db),
AttId = fabric2_util:uuid(),
- Chunks = chunkify_binary(Data),
+
+ {Data1, Compressed} = case Encoding of
+ gzip ->
+ {Data, false};
+ _ ->
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ CompressedData = term_to_binary(Data, Opts),
+ case size(CompressedData) < Data of
+ true -> {CompressedData, true};
+ false -> {Data, false}
+ end
+ end,
IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
- ok = erlfdb:set(Tx, IdKey, <<>>),
+ InfoVal = erlfdb_tuple:pack({?CURR_ATT_STORAGE_VER, Compressed}),
+ ok = erlfdb:set(Tx, IdKey, InfoVal),
+
+ Chunks = chunkify_binary(Data1),
lists:foldl(fun(Chunk, ChunkId) ->
AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix),
@@ -1474,7 +1501,8 @@ doc_to_fdb(Db, #doc{} = Doc) ->
DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts),
- Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]),
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ Value = term_to_binary({Body, DiskAtts, Deleted}, Opts),
Chunks = chunkify_binary(Value),
{Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
@@ -1526,7 +1554,7 @@ local_doc_to_fdb(Db, #doc{} = Doc) ->
_ when is_binary(Rev) -> Rev
end,
- BVal = term_to_binary(Body, [{minor_version, 1}]),
+ BVal = term_to_binary(Body, [{minor_version, 1}, {compressed, 6}]),
{Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix),
{{K, Chunk}, ChunkId + 1}
diff --git a/src/fabric/test/fabric2_doc_att_tests.erl b/src/fabric/test/fabric2_doc_att_tests.erl
index ac531e913..5d28b6da0 100644
--- a/src/fabric/test/fabric2_doc_att_tests.erl
+++ b/src/fabric/test/fabric2_doc_att_tests.erl
@@ -29,6 +29,7 @@ doc_crud_test_() ->
fun cleanup/1,
with([
?TDEF(create_att),
+ ?TDEF(create_att_already_compressed),
?TDEF(delete_att),
?TDEF(multiple_atts),
?TDEF(delete_one_att),
@@ -84,7 +85,48 @@ create_att({Db, _}) ->
IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)),
AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)),
- ?assertEqual(<<>>, IdVal),
+ ?assertEqual(erlfdb_tuple:pack({0, true}), IdVal),
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ Expect = term_to_binary(<<"foobar">>, Opts),
+ ?assertMatch([{_, Expect}], AttVals)
+ end).
+
+
+create_att_already_compressed({Db, _}) ->
+ DocId = fabric2_util:uuid(),
+ Att1 = couch_att:new([
+ {name, <<"foo.txt">>},
+ {type, <<"application/octet-stream">>},
+ {att_len, 6},
+ {data, <<"foobar">>},
+ {encoding, gzip},
+ {md5, <<>>}
+ ]),
+ Doc1 = #doc{
+ id = DocId,
+ atts = [Att1]
+ },
+ {ok, _} = fabric2_db:update_doc(Db, Doc1),
+ {ok, Doc2} = fabric2_db:open_doc(Db, DocId),
+ #doc{
+ atts = [Att2]
+ } = Doc2,
+ {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2),
+ AttData = fabric2_db:read_attachment(Db, DocId, AttId),
+ ?assertEqual(<<"foobar">>, AttData),
+
+ % Check that the raw keys exist
+ #{
+ db_prefix := DbPrefix
+ } = Db,
+ IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
+ AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix),
+
+ fabric2_fdb:transactional(fun(Tx) ->
+ IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)),
+ AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)),
+
+ ?assertEqual(erlfdb_tuple:pack({0, false}), IdVal),
?assertMatch([{_, <<"foobar">>}], AttVals)
end).
@@ -175,7 +217,7 @@ large_att({Db, _}) ->
AttData = iolist_to_binary([
<<"foobar">> || _ <- lists:seq(1, 60000)
]),
- Att1 = mk_att(<<"long.txt">>, AttData),
+ Att1 = mk_att(<<"long.txt">>, AttData, gzip),
{ok, _} = create_doc(Db, DocId, [Att1]),
?assertEqual(#{<<"long.txt">> => AttData}, read_atts(Db, DocId)),
@@ -204,12 +246,16 @@ att_on_conflict_isolation({Db, _}) ->
mk_att(Name, Data) ->
+ mk_att(Name, Data, identity).
+
+
+mk_att(Name, Data, Encoding) ->
couch_att:new([
{name, Name},
{type, <<"application/octet-stream">>},
{att_len, size(Data)},
{data, Data},
- {encoding, identity},
+ {encoding, Encoding},
{md5, <<>>}
]).