summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2023-05-06 02:20:43 -0400
committerNick Vatamaniuc <vatamane@apache.org>2023-05-13 18:28:00 -0400
commit88a0de8d23d39df8bb4d03cd3e05c5e97e4c1c85 (patch)
treec8e40a978e609485c16846acf4ce460a5cc0d910
parentdcf57c710c52b0a55c596f65f07b85855130f66f (diff)
downloadcouchdb-try-xxhash-for-couch-file.tar.gz
Use xxHash for couch_file checksumstry-xxhash-for-couch-file
Check xxhash first, since it's faster [1], and if that fails, check the slower md5 version. Bump a stats counter to indicate if there are still any md5 checksums found during normal cluster operation. Initially default to not writting xxHash checkums, only reading them. There is a config setting and tests to assert that it's possible to upgrade and downgrade. [1] Comparison of hashing a 4KB block (units are microseconds). ``` (node1@127.0.0.1)20> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> do_nothing_overhead end, lists:seq(1, 1000000)) end), (T/1000000.0). 0.167425 (node1@127.0.0.1)21> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> exxhash:xxhash128(B) end, lists:seq(1, 1000000)) end), (T/1000000). 0.770687 (node1@127.0.0.1)22> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> crypto:hash(md5, B) end, lists:seq(1, 1000000)) end), (T/1000000). 6.205445 ```
-rw-r--r--rel/overlay/etc/default.ini5
-rw-r--r--src/couch/priv/stats_descriptions.cfg4
-rw-r--r--src/couch/src/couch_file.erl108
-rw-r--r--src/couch/test/eunit/couch_file_tests.erl137
4 files changed, 225 insertions, 29 deletions
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 14b2a5362..2cc195d55 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -95,6 +95,11 @@ view_index_dir = {{view_index_dir}}
; Sets the log level for informational compaction related entries.
;compaction_log_level = info
+; Enable writting xxHash checksums in .couch files. The current
+; default is false. When the value is false both xxHash and legacy
+; checksums can be read and verified.
+;write_xxhash_checksums = false
+
[purge]
; Allowed maximum number of documents in one purge request
;max_document_id_number = 100
diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index 7c8fd94cb..dc426c387 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -290,6 +290,10 @@
{type, histogram},
{desc, <<"duration of validate_doc_update function calls">>}
]}.
+{[couchdb, legacy_checksums], [
+ {type, counter},
+ {desc, <<"number of legacy checksums found in couch_file instances">>}
+]}.
{[pread, exceed_eof], [
{type, counter},
{desc, <<"number of the attempts to read beyond end of db file">>}
diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index 514d4e3d9..09bc5f28f 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -23,6 +23,8 @@
-define(IS_OLD_STATE(S), is_pid(S#file.db_monitor)).
-define(PREFIX_SIZE, 5).
-define(DEFAULT_READ_COUNT, 1024).
+-define(WRITE_XXHASH_CHECKSUMS_KEY, {?MODULE, write_xxhash_checksums}).
+-define(WRITE_XXHASH_CHECKSUMS_DEFAULT, false).
-type block_id() :: non_neg_integer().
-type location() :: non_neg_integer().
@@ -55,6 +57,9 @@
%% helper functions
-export([process_info/1]).
+% test helper functions
+-export([reset_checksum_persistent_term_config/0]).
+
%%----------------------------------------------------------------------
%% Args: Valid Options are [create] and [create,overwrite].
%% Files are opened in read/write mode.
@@ -142,8 +147,8 @@ assemble_file_chunk(Bin) ->
[<<0:1/integer, (iolist_size(Bin)):31/integer>>, Bin].
assemble_file_chunk_and_checksum(Bin) ->
- Md5 = couch_hash:md5_hash(Bin),
- [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Md5, Bin].
+ Checksum = generate_checksum(Bin),
+ [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Checksum, Bin].
%%----------------------------------------------------------------------
%% Purpose: Reads a term from a file that was written with append_term
@@ -169,8 +174,8 @@ pread_binary(Fd, Pos) ->
pread_iolist(Fd, Pos) ->
case ioq:call(Fd, {pread_iolist, Pos}, erlang:get(io_priority)) of
- {ok, IoList, Md5} ->
- {ok, verify_md5(Fd, Pos, IoList, Md5)};
+ {ok, IoList, Checksum} ->
+ {ok, verify_checksum(Fd, Pos, IoList, Checksum)};
Error ->
Error
end.
@@ -191,13 +196,13 @@ pread_binaries(Fd, PosList) ->
pread_iolists(Fd, PosList) ->
case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of
- {ok, DataMd5s} ->
+ {ok, DataAndChecksums} ->
Data = lists:zipwith(
- fun(Pos, {IoList, Md5}) ->
- verify_md5(Fd, Pos, IoList, Md5)
+ fun(Pos, {IoList, Checksum}) ->
+ verify_checksum(Fd, Pos, IoList, Checksum)
end,
PosList,
- DataMd5s
+ DataAndChecksums
),
{ok, Data};
Error ->
@@ -400,9 +405,9 @@ read_header(Fd) ->
write_header(Fd, Data) ->
Bin = term_to_binary(Data),
- Md5 = couch_hash:md5_hash(Bin),
+ Checksum = generate_checksum(Bin),
% now we assemble the final header binary and write to disk
- FinalBin = <<Md5/binary, Bin/binary>>,
+ FinalBin = <<Checksum/binary, Bin/binary>>,
ioq:call(Fd, {write_header, FinalBin}, erlang:get(io_priority)).
init_status_error(ReturnPid, Ref, Error) ->
@@ -504,11 +509,11 @@ handle_call({pread_iolist, Pos}, _From, File) ->
update_read_timestamp(),
{LenIolist, NextPos} = read_raw_iolist_int(File, Pos, 4),
case iolist_to_binary(LenIolist) of
- % an MD5-prefixed term
+ % an checksum-prefixed term
<<1:1/integer, Len:31/integer>> ->
- {Md5AndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16),
- {Md5, IoList} = extract_md5(Md5AndIoList),
- {reply, {ok, IoList, Md5}, File};
+ {ChecksumAndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16),
+ {Checksum, IoList} = extract_checksum(ChecksumAndIoList),
+ {reply, {ok, IoList, Checksum}, File};
<<0:1/integer, Len:31/integer>> ->
{Iolist, _} = read_raw_iolist_int(File, NextPos, Len),
{reply, {ok, Iolist, <<>>}, File}
@@ -520,7 +525,7 @@ handle_call({pread_iolists, PosL}, _From, File) ->
LocNums2 = lists:map(
fun({LenIoList, NextPos}) ->
case iolist_to_binary(LenIoList) of
- % an MD5-prefixed term
+ % a checksum-prefixed term
<<1:1/integer, Len:31/integer>> ->
{NextPos, Len + 16};
<<0:1/integer, Len:31/integer>> ->
@@ -534,8 +539,8 @@ handle_call({pread_iolists, PosL}, _From, File) ->
fun({LenIoList, _}, {IoList, _}) ->
case iolist_to_binary(LenIoList) of
<<1:1/integer, _:31/integer>> ->
- {Md5, IoList} = extract_md5(IoList),
- {IoList, Md5};
+ {Checksum, IoList} = extract_checksum(IoList),
+ {IoList, Checksum};
<<0:1/integer, _:31/integer>> ->
{IoList, <<>>}
end
@@ -674,9 +679,15 @@ load_header(Fd, Pos, HeaderLen, RestBlock) ->
{ok, Missing} = file:pread(Fd, ReadStart, ReadLen),
<<RestBlock/binary, Missing/binary>>
end,
- <<Md5Sig:16/binary, HeaderBin/binary>> =
+ <<Checksum:16/binary, HeaderBin/binary>> =
iolist_to_binary(remove_block_prefixes(?PREFIX_SIZE, RawBin)),
- Md5Sig = couch_hash:md5_hash(HeaderBin),
+ case exxhash:xxhash128(HeaderBin) of
+ Checksum ->
+ ok;
+ <<_/binary>> ->
+ Checksum = couch_hash:md5_hash(HeaderBin),
+ legacy_checksums_stats_update()
+ end,
{ok, HeaderBin}.
%% Read multiple block locations using a single file:pread/2.
@@ -779,10 +790,10 @@ get_pread_locnum(File, Pos, Len) ->
{Pos, TotalBytes}
end.
--spec extract_md5(iolist()) -> {binary(), iolist()}.
-extract_md5(FullIoList) ->
- {Md5List, IoList} = split_iolist(FullIoList, 16, []),
- {iolist_to_binary(Md5List), IoList}.
+-spec extract_checksum(iolist()) -> {binary(), iolist()}.
+extract_checksum(FullIoList) ->
+ {ChecksumList, IoList} = split_iolist(FullIoList, 16, []),
+ {iolist_to_binary(ChecksumList), IoList}.
calculate_total_read_len(0, FinalLen) ->
calculate_total_read_len(1, FinalLen) + 1;
@@ -852,15 +863,23 @@ monitored_by_pids() ->
{monitored_by, PidsAndRefs} = process_info(self(), monitored_by),
lists:filter(fun is_pid/1, PidsAndRefs).
-verify_md5(_Fd, _Pos, IoList, <<>>) ->
+verify_checksum(_Fd, _Pos, IoList, <<>>) ->
IoList;
-verify_md5(Fd, Pos, IoList, Md5) ->
- case couch_hash:md5_hash(IoList) of
- Md5 -> IoList;
- _ -> report_md5_error(Fd, Pos)
+verify_checksum(Fd, Pos, IoList, Checksum) ->
+ case exxhash:xxhash128(iolist_to_binary(IoList)) of
+ Checksum ->
+ IoList;
+ <<_/binary>> ->
+ case couch_hash:md5_hash(IoList) of
+ Checksum ->
+ legacy_checksums_stats_update(),
+ IoList;
+ _ ->
+ report_checksum_error(Fd, Pos)
+ end
end.
-report_md5_error(Fd, Pos) ->
+report_checksum_error(Fd, Pos) ->
couch_log:emergency("File corruption in ~p at position ~B", [Fd, Pos]),
exit({file_corruption, <<"file corruption">>}).
@@ -906,6 +925,37 @@ reset_eof(#file{} = File) ->
{ok, Eof} = file:position(File#file.fd, eof),
File#file{eof = Eof}.
+-spec generate_checksum(binary()) -> <<_:128>>.
+generate_checksum(Bin) when is_binary(Bin) ->
+ case generate_xxhash_checksums() of
+ true -> <<_:128>> = exxhash:xxhash128(Bin);
+ false -> <<_:128>> = couch_hash:md5_hash(Bin)
+ end.
+
+legacy_checksums_stats_update() ->
+ % Bump stats only if we're writing new checksums.
+ case generate_xxhash_checksums() of
+ true -> couch_stats:increment_counter([couchdb, legacy_checksums]);
+ false -> ok
+ end.
+
+reset_checksum_persistent_term_config() ->
+ persistent_term:erase(?WRITE_XXHASH_CHECKSUMS_KEY).
+
+generate_xxhash_checksums() ->
+ % Caching the config value here as we'd need to call this per file chunk
+ % and also from various processes (not just couch_file pids). Node must be
+ % restarted for the new value to take effect.
+ case persistent_term:get(?WRITE_XXHASH_CHECKSUMS_KEY, not_cached) of
+ not_cached ->
+ Default = ?WRITE_XXHASH_CHECKSUMS_DEFAULT,
+ Val = config:get_boolean("couchdb", "write_xxhash_checksums", Default),
+ persistent_term:put(?WRITE_XXHASH_CHECKSUMS_KEY, Val),
+ Val;
+ Val when is_boolean(Val) ->
+ Val
+ end.
+
-ifdef(TEST).
-include_lib("couch/include/couch_eunit.hrl").
diff --git a/src/couch/test/eunit/couch_file_tests.erl b/src/couch/test/eunit/couch_file_tests.erl
index 1b54cd70e..fbe23ded7 100644
--- a/src/couch/test/eunit/couch_file_tests.erl
+++ b/src/couch/test/eunit/couch_file_tests.erl
@@ -551,3 +551,140 @@ fake_fsync_fd() ->
{'$gen_call', From, sync} ->
gen:reply(From, {error, eio})
end.
+
+checksum_test_() ->
+ {
+ foreach,
+ fun setup_checksum/0,
+ fun teardown_checksum/1,
+ [
+ ?TDEF_FE(t_write_read_xxhash_checksums),
+ ?TDEF_FE(t_downgrade_xxhash_checksums),
+ ?TDEF_FE(t_read_legacy_checksums_after_upgrade)
+ ]
+ }.
+
+setup_checksum() ->
+ Path = ?tempfile(),
+ Ctx = test_util:start_couch(),
+ config:set("couchdb", "write_xxhash_checksums", "false", _Persist = false),
+ {Ctx, Path}.
+
+teardown_checksum({Ctx, Path}) ->
+ file:delete(Path),
+ meck:unload(),
+ test_util:stop_couch(Ctx),
+ couch_file:reset_checksum_persistent_term_config().
+
+t_write_read_xxhash_checksums({_Ctx, Path}) ->
+ enable_xxhash(),
+
+ {ok, Fd} = couch_file:open(Path, [create]),
+ Header = header,
+ ok = couch_file:write_header(Fd, Header),
+ Bin = <<"bin">>,
+ Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+ {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+ couch_file:close(Fd),
+
+ {ok, Fd1} = couch_file:open(Path, []),
+ {ok, Header1} = couch_file:read_header(Fd1),
+ ?assertEqual(Header, Header1),
+ {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+ ?assertEqual(Bin, Bin1),
+ ?assertEqual(0, legacy_stats()),
+ couch_file:close(Fd1).
+
+t_downgrade_xxhash_checksums({_Ctx, Path}) ->
+ % We're in the future and writting xxhash checkums by default
+ enable_xxhash(),
+ {ok, Fd} = couch_file:open(Path, [create]),
+ Header = header,
+ ok = couch_file:write_header(Fd, Header),
+ Bin = <<"bin">>,
+ Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+ {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+ couch_file:close(Fd),
+
+ % The future was broken, we travel back, but still know how to
+ % interpret future checksums without crashing
+ disable_xxhash(),
+ {ok, Fd1} = couch_file:open(Path, []),
+ {ok, Header1} = couch_file:read_header(Fd1),
+ ?assertEqual(Header, Header1),
+ {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+ ?assertEqual(Bin, Bin1),
+
+ % We'll write some legacy checksums to the file and then ensure
+ % we can read both legacy and the new ones
+ OtherBin = <<"otherbin">>,
+ OtherChunk = couch_file:assemble_file_chunk_and_checksum(OtherBin),
+ {ok, OtherPos, _} = couch_file:append_raw_chunk(Fd1, OtherChunk),
+ couch_file:close(Fd1),
+
+ {ok, Fd2} = couch_file:open(Path, []),
+ {ok, Header2} = couch_file:read_header(Fd2),
+ ?assertEqual(Header, Header2),
+ {ok, Bin2} = couch_file:pread_binary(Fd2, Pos),
+ {ok, OtherBin1} = couch_file:pread_binary(Fd2, OtherPos),
+ ?assertEqual(Bin, Bin2),
+ ?assertEqual(OtherBin, OtherBin1),
+ couch_file:close(Fd2).
+
+t_read_legacy_checksums_after_upgrade({_Ctx, Path}) ->
+ % We're in the past and writting legacy checkums by default
+ disable_xxhash(),
+ {ok, Fd} = couch_file:open(Path, [create]),
+ Header = header,
+ ok = couch_file:write_header(Fd, Header),
+ Bin = <<"bin">>,
+ Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+ {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+ couch_file:close(Fd),
+
+ % We upgrade and xxhash checksums are not the default, but we can
+ % still read legacy checksums.
+ enable_xxhash(),
+ {ok, Fd1} = couch_file:open(Path, []),
+ {ok, Header1} = couch_file:read_header(Fd1),
+ ?assertEqual(Header, Header1),
+ {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+ ?assertEqual(Bin, Bin1),
+ % one header, one chunk
+ ?assertEqual(2, legacy_stats()),
+
+ % We'll write some new checksums to the file and then ensure
+ % we can read both legacy and the new ones
+ OtherBin = <<"otherbin">>,
+ OtherChunk = couch_file:assemble_file_chunk_and_checksum(OtherBin),
+ {ok, OtherPos, _} = couch_file:append_raw_chunk(Fd1, OtherChunk),
+ couch_file:close(Fd1),
+
+ couch_stats:decrement_counter([couchdb, legacy_checksums], legacy_stats()),
+ {ok, Fd2} = couch_file:open(Path, []),
+ {ok, Header2} = couch_file:read_header(Fd2),
+ ?assertEqual(Header, Header2),
+ {ok, Bin2} = couch_file:pread_binary(Fd2, Pos),
+ {ok, OtherBin1} = couch_file:pread_binary(Fd2, OtherPos),
+ ?assertEqual(Bin, Bin2),
+ ?assertEqual(OtherBin, OtherBin1),
+ % one header, legacy chunk, not counting new chunk
+ ?assertEqual(2, legacy_stats()),
+ couch_file:close(Fd2).
+
+enable_xxhash() ->
+ couch_file:reset_checksum_persistent_term_config(),
+ reset_legacy_checksum_stats(),
+ config:set("couchdb", "write_xxhash_checksums", "true", _Persist = false).
+
+disable_xxhash() ->
+ couch_file:reset_checksum_persistent_term_config(),
+ reset_legacy_checksum_stats(),
+ config:set("couchdb", "write_xxhash_checksums", "false", _Persist = false).
+
+legacy_stats() ->
+ couch_stats:sample([couchdb, legacy_checksums]).
+
+reset_legacy_checksum_stats() ->
+ Counter = couch_stats:sample([couchdb, legacy_checksums]),
+ couch_stats:decrement_counter([couchdb, legacy_checksums], Counter).