summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2020-04-04 16:45:52 -0400
committerNick Vatamaniuc <vatamane@apache.org>2020-04-06 15:44:17 -0400
commita369c5285f6b9de91d723a962a907db27bae02a4 (patch)
treed4bdb9bc2ecd38dedc9b7c032b12bfd9a47a5e85
parent5652e72e43406b7e4b743ee3fe7e2570aec77e95 (diff)
downloadcouchdb-compress-doc-bodies-and-attachments.tar.gz
Compress doc bodies and attachmentscompress-doc-bodies-and-attachments
In CouchDB < 4.x we compressed document bodies by default, so enable it for 4.x as well. Use the basic term_to_binary compression mechanism for: - Document bodies - Local document bodies - Attachments, but only if they have not already been compressed.
-rw-r--r--src/fabric/include/fabric2.hrl4
-rw-r--r--src/fabric/src/fabric2_db.erl3
-rw-r--r--src/fabric/src/fabric2_fdb.erl42
-rw-r--r--src/fabric/test/fabric2_doc_att_tests.erl51
4 files changed, 89 insertions, 11 deletions
diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl
index 0c0757567..99ac87497 100644
--- a/src/fabric/include/fabric2.hrl
+++ b/src/fabric/include/fabric2.hrl
@@ -54,6 +54,10 @@
-define(CURR_LDOC_FORMAT, 0).
+% 0 - Attachment storage version
+
+-define(CURR_ATT_STORAGE_VER, 0).
+
% Misc constants
-define(PDICT_DB_KEY, '$fabric_db_handle').
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index fb6ae5176..54c6b37de 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -809,7 +809,8 @@ read_attachment(Db, DocId, AttId) ->
write_attachment(Db, DocId, Att) ->
Data = couch_att:fetch(data, Att),
- {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data),
+ Encoding = couch_att:fetch(encoding, Att),
+ {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data, Encoding),
couch_att:store(data, {loc, Db, DocId, AttId}, Att).
diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl
index 2295a5648..5e5ba3e5f 100644
--- a/src/fabric/src/fabric2_fdb.erl
+++ b/src/fabric/src/fabric2_fdb.erl
@@ -54,7 +54,7 @@
write_local_doc/2,
read_attachment/3,
- write_attachment/3,
+ write_attachment/4,
get_last_change/1,
@@ -902,26 +902,53 @@ read_attachment(#{} = Db, DocId, AttId) ->
} = ensure_current(Db),
AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix),
- case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of
+ Data = case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of
not_found ->
throw({not_found, missing});
KVs ->
Vs = [V || {_K, V} <- KVs],
iolist_to_binary(Vs)
+ end,
+
+ IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
+ case erlfdb:wait(erlfdb:get(Tx, IdKey)) of
+ <<>> ->
+ Data; % Old format, before CURR_ATT_STORAGE_VER = 0
+ <<_/binary>> = InfoBin ->
+ {?CURR_ATT_STORAGE_VER, Compressed} = erlfdb_tuple:unpack(InfoBin),
+ case Compressed of
+ true -> binary_to_term(Data, [safe]);
+ false -> Data
+ end
end.
-write_attachment(#{} = Db, DocId, Data) when is_binary(Data) ->
+write_attachment(#{} = Db, DocId, Data, Encoding)
+ when is_binary(Data), is_atom(Encoding) ->
#{
tx := Tx,
db_prefix := DbPrefix
} = ensure_current(Db),
AttId = fabric2_util:uuid(),
- Chunks = chunkify_binary(Data),
+
+ {Data1, Compressed} = case Encoding of
+ gzip ->
+ {Data, false};
+ _ ->
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ CompressedData = term_to_binary(Data, Opts),
+ case size(CompressedData) < Data of
+ true -> {CompressedData, true};
+ false -> {Data, false}
+ end
+ end,
IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
- ok = erlfdb:set(Tx, IdKey, <<>>),
+ InfoVal = erlfdb_tuple:pack({?CURR_ATT_STORAGE_VER, Compressed}),
+ ok = erlfdb:set(Tx, IdKey, InfoVal),
+
+ Chunks = chunkify_binary(Data1),
lists:foldl(fun(Chunk, ChunkId) ->
AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix),
@@ -1366,7 +1393,8 @@ doc_to_fdb(Db, #doc{} = Doc) ->
DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts),
- Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]),
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ Value = term_to_binary({Body, DiskAtts, Deleted}, Opts),
Chunks = chunkify_binary(Value),
{Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
@@ -1418,7 +1446,7 @@ local_doc_to_fdb(Db, #doc{} = Doc) ->
_ when is_binary(Rev) -> Rev
end,
- BVal = term_to_binary(Body, [{minor_version, 1}]),
+ BVal = term_to_binary(Body, [{minor_version, 1}, {compressed, 6}]),
{Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix),
{{K, Chunk}, ChunkId + 1}
diff --git a/src/fabric/test/fabric2_doc_att_tests.erl b/src/fabric/test/fabric2_doc_att_tests.erl
index ac531e913..fc7bbccbc 100644
--- a/src/fabric/test/fabric2_doc_att_tests.erl
+++ b/src/fabric/test/fabric2_doc_att_tests.erl
@@ -29,6 +29,7 @@ doc_crud_test_() ->
fun cleanup/1,
with([
?TDEF(create_att),
+ ?TDEF(create_att_already_compressed),
?TDEF(delete_att),
?TDEF(multiple_atts),
?TDEF(delete_one_att),
@@ -84,7 +85,47 @@ create_att({Db, _}) ->
IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)),
AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)),
- ?assertEqual(<<>>, IdVal),
+ ?assertEqual(erlfdb_tuple:pack({0, true}), IdVal),
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ Expect = term_to_binary(<<"foobar">>, Opts),
+ ?assertMatch([{_, Expect}], AttVals)
+ end).
+
+create_att_already_compressed({Db, _}) ->
+ DocId = fabric2_util:uuid(),
+ Att1 = couch_att:new([
+ {name, <<"foo.txt">>},
+ {type, <<"application/octet-stream">>},
+ {att_len, 6},
+ {data, <<"foobar">>},
+ {encoding, gzip},
+ {md5, <<>>}
+ ]),
+ Doc1 = #doc{
+ id = DocId,
+ atts = [Att1]
+ },
+ {ok, _} = fabric2_db:update_doc(Db, Doc1),
+ {ok, Doc2} = fabric2_db:open_doc(Db, DocId),
+ #doc{
+ atts = [Att2]
+ } = Doc2,
+ {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2),
+ AttData = fabric2_db:read_attachment(Db, DocId, AttId),
+ ?assertEqual(<<"foobar">>, AttData),
+
+ % Check that the raw keys exist
+ #{
+ db_prefix := DbPrefix
+ } = Db,
+ IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
+ AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix),
+
+ fabric2_fdb:transactional(fun(Tx) ->
+ IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)),
+ AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)),
+
+ ?assertEqual(erlfdb_tuple:pack({0, false}), IdVal),
?assertMatch([{_, <<"foobar">>}], AttVals)
end).
@@ -175,7 +216,7 @@ large_att({Db, _}) ->
AttData = iolist_to_binary([
<<"foobar">> || _ <- lists:seq(1, 60000)
]),
- Att1 = mk_att(<<"long.txt">>, AttData),
+ Att1 = mk_att(<<"long.txt">>, AttData, gzip),
{ok, _} = create_doc(Db, DocId, [Att1]),
?assertEqual(#{<<"long.txt">> => AttData}, read_atts(Db, DocId)),
@@ -204,12 +245,16 @@ att_on_conflict_isolation({Db, _}) ->
mk_att(Name, Data) ->
+ mk_att(Name, Data, identity).
+
+
+mk_att(Name, Data, Encoding) ->
couch_att:new([
{name, Name},
{type, <<"application/octet-stream">>},
{att_len, size(Data)},
{data, Data},
- {encoding, identity},
+ {encoding, Encoding},
{md5, <<>>}
]).