diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2020-04-04 16:45:52 -0400 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2020-04-07 13:55:53 -0400 |
commit | d6ec9935453c4f0fe26174a472cdf3e4cb9c5e60 (patch) | |
tree | 09f79be1b964ce566d47a4735dd754cb295a5de4 | |
parent | 6c1d7a993571d9e0e2304b12bbeaf8abb146cb0e (diff) | |
download | couchdb-d6ec9935453c4f0fe26174a472cdf3e4cb9c5e60.tar.gz |
Compress doc bodies and attachments
In CouchDB < 4.x we compressed document bodies by default, so enable it
for 4.x as well.
Use the basic term_to_binary compression mechanism for:
- Document bodies
- Local document bodies
- Attachments, but only if they have not already been compressed.
-rw-r--r-- | src/fabric/include/fabric2.hrl | 4 | ||||
-rw-r--r-- | src/fabric/src/fabric2_db.erl | 3 | ||||
-rw-r--r-- | src/fabric/src/fabric2_fdb.erl | 42 | ||||
-rw-r--r-- | src/fabric/test/fabric2_doc_att_tests.erl | 52 |
4 files changed, 90 insertions, 11 deletions
diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index e12762260..587b4f888 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -55,6 +55,10 @@ -define(CURR_LDOC_FORMAT, 0). +% 0 - Attachment storage version + +-define(CURR_ATT_STORAGE_VER, 0). + % Misc constants -define(PDICT_DB_KEY, '$fabric_db_handle'). diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 3d6d9245e..9b9efdac2 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -913,7 +913,8 @@ read_attachment(Db, DocId, AttId) -> write_attachment(Db, DocId, Att) -> Data = couch_att:fetch(data, Att), - {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data), + Encoding = couch_att:fetch(encoding, Att), + {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data, Encoding), couch_att:store(data, {loc, Db, DocId, AttId}, Att). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 430693329..d96c3ae60 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -57,7 +57,7 @@ write_local_doc/2, read_attachment/3, - write_attachment/3, + write_attachment/4, get_last_change/1, @@ -971,26 +971,53 @@ read_attachment(#{} = Db, DocId, AttId) -> } = ensure_current(Db), AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), - case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of + Data = case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of not_found -> throw({not_found, missing}); KVs -> Vs = [V || {_K, V} <- KVs], iolist_to_binary(Vs) + end, + + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, IdKey)) of + <<>> -> + Data; % Old format, before CURR_ATT_STORAGE_VER = 0 + <<_/binary>> = InfoBin -> + {?CURR_ATT_STORAGE_VER, Compressed} = erlfdb_tuple:unpack(InfoBin), + case Compressed of + true -> binary_to_term(Data, [safe]); + false -> Data + end end. -write_attachment(#{} = Db, DocId, Data) when is_binary(Data) -> +write_attachment(#{} = Db, DocId, Data, Encoding) + when is_binary(Data), is_atom(Encoding) -> #{ tx := Tx, db_prefix := DbPrefix } = ensure_current(Db), AttId = fabric2_util:uuid(), - Chunks = chunkify_binary(Data), + + {Data1, Compressed} = case Encoding of + gzip -> + {Data, false}; + _ -> + Opts = [{minor_version, 1}, {compressed, 6}], + CompressedData = term_to_binary(Data, Opts), + case size(CompressedData) < Data of + true -> {CompressedData, true}; + false -> {Data, false} + end + end, IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), - ok = erlfdb:set(Tx, IdKey, <<>>), + InfoVal = erlfdb_tuple:pack({?CURR_ATT_STORAGE_VER, Compressed}), + ok = erlfdb:set(Tx, IdKey, InfoVal), + + Chunks = chunkify_binary(Data1), lists:foldl(fun(Chunk, ChunkId) -> AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix), @@ -1474,7 +1501,8 @@ doc_to_fdb(Db, #doc{} = Doc) -> DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts), - Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]), + Opts = [{minor_version, 1}, {compressed, 6}], + Value = term_to_binary({Body, DiskAtts, Deleted}, Opts), Chunks = chunkify_binary(Value), {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> @@ -1526,7 +1554,7 @@ local_doc_to_fdb(Db, #doc{} = Doc) -> _ when is_binary(Rev) -> Rev end, - BVal = term_to_binary(Body, [{minor_version, 1}]), + BVal = term_to_binary(Body, [{minor_version, 1}, {compressed, 6}]), {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix), {{K, Chunk}, ChunkId + 1} diff --git a/src/fabric/test/fabric2_doc_att_tests.erl b/src/fabric/test/fabric2_doc_att_tests.erl index ac531e913..5d28b6da0 100644 --- a/src/fabric/test/fabric2_doc_att_tests.erl +++ b/src/fabric/test/fabric2_doc_att_tests.erl @@ -29,6 +29,7 @@ doc_crud_test_() -> fun cleanup/1, with([ ?TDEF(create_att), + ?TDEF(create_att_already_compressed), ?TDEF(delete_att), ?TDEF(multiple_atts), ?TDEF(delete_one_att), @@ -84,7 +85,48 @@ create_att({Db, _}) -> IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), - ?assertEqual(<<>>, IdVal), + ?assertEqual(erlfdb_tuple:pack({0, true}), IdVal), + Opts = [{minor_version, 1}, {compressed, 6}], + Expect = term_to_binary(<<"foobar">>, Opts), + ?assertMatch([{_, Expect}], AttVals) + end). + + +create_att_already_compressed({Db, _}) -> + DocId = fabric2_util:uuid(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, gzip}, + {md5, <<>>} + ]), + Doc1 = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + {ok, Doc2} = fabric2_db:open_doc(Db, DocId), + #doc{ + atts = [Att2] + } = Doc2, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + AttData = fabric2_db:read_attachment(Db, DocId, AttId), + ?assertEqual(<<"foobar">>, AttData), + + % Check that the raw keys exist + #{ + db_prefix := DbPrefix + } = Db, + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + + fabric2_fdb:transactional(fun(Tx) -> + IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + + ?assertEqual(erlfdb_tuple:pack({0, false}), IdVal), ?assertMatch([{_, <<"foobar">>}], AttVals) end). @@ -175,7 +217,7 @@ large_att({Db, _}) -> AttData = iolist_to_binary([ <<"foobar">> || _ <- lists:seq(1, 60000) ]), - Att1 = mk_att(<<"long.txt">>, AttData), + Att1 = mk_att(<<"long.txt">>, AttData, gzip), {ok, _} = create_doc(Db, DocId, [Att1]), ?assertEqual(#{<<"long.txt">> => AttData}, read_atts(Db, DocId)), @@ -204,12 +246,16 @@ att_on_conflict_isolation({Db, _}) -> mk_att(Name, Data) -> + mk_att(Name, Data, identity). + + +mk_att(Name, Data, Encoding) -> couch_att:new([ {name, Name}, {type, <<"application/octet-stream">>}, {att_len, size(Data)}, {data, Data}, - {encoding, identity}, + {encoding, Encoding}, {md5, <<>>} ]). |