From 88a0de8d23d39df8bb4d03cd3e05c5e97e4c1c85 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Sat, 6 May 2023 02:20:43 -0400 Subject: Use xxHash for couch_file checksums Check xxhash first, since it's faster [1], and if that fails, check the slower md5 version. Bump a stats counter to indicate if there are still any md5 checksums found during normal cluster operation. Initially default to not writting xxHash checkums, only reading them. There is a config setting and tests to assert that it's possible to upgrade and downgrade. [1] Comparison of hashing a 4KB block (units are microseconds). ``` (node1@127.0.0.1)20> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> do_nothing_overhead end, lists:seq(1, 1000000)) end), (T/1000000.0). 0.167425 (node1@127.0.0.1)21> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> exxhash:xxhash128(B) end, lists:seq(1, 1000000)) end), (T/1000000). 0.770687 (node1@127.0.0.1)22> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun (_) -> crypto:hash(md5, B) end, lists:seq(1, 1000000)) end), (T/1000000). 6.205445 ``` --- rel/overlay/etc/default.ini | 5 ++ src/couch/priv/stats_descriptions.cfg | 4 + src/couch/src/couch_file.erl | 108 ++++++++++++++++------- src/couch/test/eunit/couch_file_tests.erl | 137 ++++++++++++++++++++++++++++++ 4 files changed, 225 insertions(+), 29 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 14b2a5362..2cc195d55 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -95,6 +95,11 @@ view_index_dir = {{view_index_dir}} ; Sets the log level for informational compaction related entries. ;compaction_log_level = info +; Enable writting xxHash checksums in .couch files. The current +; default is false. When the value is false both xxHash and legacy +; checksums can be read and verified. +;write_xxhash_checksums = false + [purge] ; Allowed maximum number of documents in one purge request ;max_document_id_number = 100 diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg index 7c8fd94cb..dc426c387 100644 --- a/src/couch/priv/stats_descriptions.cfg +++ b/src/couch/priv/stats_descriptions.cfg @@ -290,6 +290,10 @@ {type, histogram}, {desc, <<"duration of validate_doc_update function calls">>} ]}. +{[couchdb, legacy_checksums], [ + {type, counter}, + {desc, <<"number of legacy checksums found in couch_file instances">>} +]}. {[pread, exceed_eof], [ {type, counter}, {desc, <<"number of the attempts to read beyond end of db file">>} diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl index 514d4e3d9..09bc5f28f 100644 --- a/src/couch/src/couch_file.erl +++ b/src/couch/src/couch_file.erl @@ -23,6 +23,8 @@ -define(IS_OLD_STATE(S), is_pid(S#file.db_monitor)). -define(PREFIX_SIZE, 5). -define(DEFAULT_READ_COUNT, 1024). +-define(WRITE_XXHASH_CHECKSUMS_KEY, {?MODULE, write_xxhash_checksums}). +-define(WRITE_XXHASH_CHECKSUMS_DEFAULT, false). -type block_id() :: non_neg_integer(). -type location() :: non_neg_integer(). @@ -55,6 +57,9 @@ %% helper functions -export([process_info/1]). +% test helper functions +-export([reset_checksum_persistent_term_config/0]). + %%---------------------------------------------------------------------- %% Args: Valid Options are [create] and [create,overwrite]. %% Files are opened in read/write mode. @@ -142,8 +147,8 @@ assemble_file_chunk(Bin) -> [<<0:1/integer, (iolist_size(Bin)):31/integer>>, Bin]. assemble_file_chunk_and_checksum(Bin) -> - Md5 = couch_hash:md5_hash(Bin), - [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Md5, Bin]. + Checksum = generate_checksum(Bin), + [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Checksum, Bin]. %%---------------------------------------------------------------------- %% Purpose: Reads a term from a file that was written with append_term @@ -169,8 +174,8 @@ pread_binary(Fd, Pos) -> pread_iolist(Fd, Pos) -> case ioq:call(Fd, {pread_iolist, Pos}, erlang:get(io_priority)) of - {ok, IoList, Md5} -> - {ok, verify_md5(Fd, Pos, IoList, Md5)}; + {ok, IoList, Checksum} -> + {ok, verify_checksum(Fd, Pos, IoList, Checksum)}; Error -> Error end. @@ -191,13 +196,13 @@ pread_binaries(Fd, PosList) -> pread_iolists(Fd, PosList) -> case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of - {ok, DataMd5s} -> + {ok, DataAndChecksums} -> Data = lists:zipwith( - fun(Pos, {IoList, Md5}) -> - verify_md5(Fd, Pos, IoList, Md5) + fun(Pos, {IoList, Checksum}) -> + verify_checksum(Fd, Pos, IoList, Checksum) end, PosList, - DataMd5s + DataAndChecksums ), {ok, Data}; Error -> @@ -400,9 +405,9 @@ read_header(Fd) -> write_header(Fd, Data) -> Bin = term_to_binary(Data), - Md5 = couch_hash:md5_hash(Bin), + Checksum = generate_checksum(Bin), % now we assemble the final header binary and write to disk - FinalBin = <>, + FinalBin = <>, ioq:call(Fd, {write_header, FinalBin}, erlang:get(io_priority)). init_status_error(ReturnPid, Ref, Error) -> @@ -504,11 +509,11 @@ handle_call({pread_iolist, Pos}, _From, File) -> update_read_timestamp(), {LenIolist, NextPos} = read_raw_iolist_int(File, Pos, 4), case iolist_to_binary(LenIolist) of - % an MD5-prefixed term + % an checksum-prefixed term <<1:1/integer, Len:31/integer>> -> - {Md5AndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16), - {Md5, IoList} = extract_md5(Md5AndIoList), - {reply, {ok, IoList, Md5}, File}; + {ChecksumAndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16), + {Checksum, IoList} = extract_checksum(ChecksumAndIoList), + {reply, {ok, IoList, Checksum}, File}; <<0:1/integer, Len:31/integer>> -> {Iolist, _} = read_raw_iolist_int(File, NextPos, Len), {reply, {ok, Iolist, <<>>}, File} @@ -520,7 +525,7 @@ handle_call({pread_iolists, PosL}, _From, File) -> LocNums2 = lists:map( fun({LenIoList, NextPos}) -> case iolist_to_binary(LenIoList) of - % an MD5-prefixed term + % a checksum-prefixed term <<1:1/integer, Len:31/integer>> -> {NextPos, Len + 16}; <<0:1/integer, Len:31/integer>> -> @@ -534,8 +539,8 @@ handle_call({pread_iolists, PosL}, _From, File) -> fun({LenIoList, _}, {IoList, _}) -> case iolist_to_binary(LenIoList) of <<1:1/integer, _:31/integer>> -> - {Md5, IoList} = extract_md5(IoList), - {IoList, Md5}; + {Checksum, IoList} = extract_checksum(IoList), + {IoList, Checksum}; <<0:1/integer, _:31/integer>> -> {IoList, <<>>} end @@ -674,9 +679,15 @@ load_header(Fd, Pos, HeaderLen, RestBlock) -> {ok, Missing} = file:pread(Fd, ReadStart, ReadLen), <> end, - <> = + <> = iolist_to_binary(remove_block_prefixes(?PREFIX_SIZE, RawBin)), - Md5Sig = couch_hash:md5_hash(HeaderBin), + case exxhash:xxhash128(HeaderBin) of + Checksum -> + ok; + <<_/binary>> -> + Checksum = couch_hash:md5_hash(HeaderBin), + legacy_checksums_stats_update() + end, {ok, HeaderBin}. %% Read multiple block locations using a single file:pread/2. @@ -779,10 +790,10 @@ get_pread_locnum(File, Pos, Len) -> {Pos, TotalBytes} end. --spec extract_md5(iolist()) -> {binary(), iolist()}. -extract_md5(FullIoList) -> - {Md5List, IoList} = split_iolist(FullIoList, 16, []), - {iolist_to_binary(Md5List), IoList}. +-spec extract_checksum(iolist()) -> {binary(), iolist()}. +extract_checksum(FullIoList) -> + {ChecksumList, IoList} = split_iolist(FullIoList, 16, []), + {iolist_to_binary(ChecksumList), IoList}. calculate_total_read_len(0, FinalLen) -> calculate_total_read_len(1, FinalLen) + 1; @@ -852,15 +863,23 @@ monitored_by_pids() -> {monitored_by, PidsAndRefs} = process_info(self(), monitored_by), lists:filter(fun is_pid/1, PidsAndRefs). -verify_md5(_Fd, _Pos, IoList, <<>>) -> +verify_checksum(_Fd, _Pos, IoList, <<>>) -> IoList; -verify_md5(Fd, Pos, IoList, Md5) -> - case couch_hash:md5_hash(IoList) of - Md5 -> IoList; - _ -> report_md5_error(Fd, Pos) +verify_checksum(Fd, Pos, IoList, Checksum) -> + case exxhash:xxhash128(iolist_to_binary(IoList)) of + Checksum -> + IoList; + <<_/binary>> -> + case couch_hash:md5_hash(IoList) of + Checksum -> + legacy_checksums_stats_update(), + IoList; + _ -> + report_checksum_error(Fd, Pos) + end end. -report_md5_error(Fd, Pos) -> +report_checksum_error(Fd, Pos) -> couch_log:emergency("File corruption in ~p at position ~B", [Fd, Pos]), exit({file_corruption, <<"file corruption">>}). @@ -906,6 +925,37 @@ reset_eof(#file{} = File) -> {ok, Eof} = file:position(File#file.fd, eof), File#file{eof = Eof}. +-spec generate_checksum(binary()) -> <<_:128>>. +generate_checksum(Bin) when is_binary(Bin) -> + case generate_xxhash_checksums() of + true -> <<_:128>> = exxhash:xxhash128(Bin); + false -> <<_:128>> = couch_hash:md5_hash(Bin) + end. + +legacy_checksums_stats_update() -> + % Bump stats only if we're writing new checksums. + case generate_xxhash_checksums() of + true -> couch_stats:increment_counter([couchdb, legacy_checksums]); + false -> ok + end. + +reset_checksum_persistent_term_config() -> + persistent_term:erase(?WRITE_XXHASH_CHECKSUMS_KEY). + +generate_xxhash_checksums() -> + % Caching the config value here as we'd need to call this per file chunk + % and also from various processes (not just couch_file pids). Node must be + % restarted for the new value to take effect. + case persistent_term:get(?WRITE_XXHASH_CHECKSUMS_KEY, not_cached) of + not_cached -> + Default = ?WRITE_XXHASH_CHECKSUMS_DEFAULT, + Val = config:get_boolean("couchdb", "write_xxhash_checksums", Default), + persistent_term:put(?WRITE_XXHASH_CHECKSUMS_KEY, Val), + Val; + Val when is_boolean(Val) -> + Val + end. + -ifdef(TEST). -include_lib("couch/include/couch_eunit.hrl"). diff --git a/src/couch/test/eunit/couch_file_tests.erl b/src/couch/test/eunit/couch_file_tests.erl index 1b54cd70e..fbe23ded7 100644 --- a/src/couch/test/eunit/couch_file_tests.erl +++ b/src/couch/test/eunit/couch_file_tests.erl @@ -551,3 +551,140 @@ fake_fsync_fd() -> {'$gen_call', From, sync} -> gen:reply(From, {error, eio}) end. + +checksum_test_() -> + { + foreach, + fun setup_checksum/0, + fun teardown_checksum/1, + [ + ?TDEF_FE(t_write_read_xxhash_checksums), + ?TDEF_FE(t_downgrade_xxhash_checksums), + ?TDEF_FE(t_read_legacy_checksums_after_upgrade) + ] + }. + +setup_checksum() -> + Path = ?tempfile(), + Ctx = test_util:start_couch(), + config:set("couchdb", "write_xxhash_checksums", "false", _Persist = false), + {Ctx, Path}. + +teardown_checksum({Ctx, Path}) -> + file:delete(Path), + meck:unload(), + test_util:stop_couch(Ctx), + couch_file:reset_checksum_persistent_term_config(). + +t_write_read_xxhash_checksums({_Ctx, Path}) -> + enable_xxhash(), + + {ok, Fd} = couch_file:open(Path, [create]), + Header = header, + ok = couch_file:write_header(Fd, Header), + Bin = <<"bin">>, + Chunk = couch_file:assemble_file_chunk_and_checksum(Bin), + {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk), + couch_file:close(Fd), + + {ok, Fd1} = couch_file:open(Path, []), + {ok, Header1} = couch_file:read_header(Fd1), + ?assertEqual(Header, Header1), + {ok, Bin1} = couch_file:pread_binary(Fd1, Pos), + ?assertEqual(Bin, Bin1), + ?assertEqual(0, legacy_stats()), + couch_file:close(Fd1). + +t_downgrade_xxhash_checksums({_Ctx, Path}) -> + % We're in the future and writting xxhash checkums by default + enable_xxhash(), + {ok, Fd} = couch_file:open(Path, [create]), + Header = header, + ok = couch_file:write_header(Fd, Header), + Bin = <<"bin">>, + Chunk = couch_file:assemble_file_chunk_and_checksum(Bin), + {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk), + couch_file:close(Fd), + + % The future was broken, we travel back, but still know how to + % interpret future checksums without crashing + disable_xxhash(), + {ok, Fd1} = couch_file:open(Path, []), + {ok, Header1} = couch_file:read_header(Fd1), + ?assertEqual(Header, Header1), + {ok, Bin1} = couch_file:pread_binary(Fd1, Pos), + ?assertEqual(Bin, Bin1), + + % We'll write some legacy checksums to the file and then ensure + % we can read both legacy and the new ones + OtherBin = <<"otherbin">>, + OtherChunk = couch_file:assemble_file_chunk_and_checksum(OtherBin), + {ok, OtherPos, _} = couch_file:append_raw_chunk(Fd1, OtherChunk), + couch_file:close(Fd1), + + {ok, Fd2} = couch_file:open(Path, []), + {ok, Header2} = couch_file:read_header(Fd2), + ?assertEqual(Header, Header2), + {ok, Bin2} = couch_file:pread_binary(Fd2, Pos), + {ok, OtherBin1} = couch_file:pread_binary(Fd2, OtherPos), + ?assertEqual(Bin, Bin2), + ?assertEqual(OtherBin, OtherBin1), + couch_file:close(Fd2). + +t_read_legacy_checksums_after_upgrade({_Ctx, Path}) -> + % We're in the past and writting legacy checkums by default + disable_xxhash(), + {ok, Fd} = couch_file:open(Path, [create]), + Header = header, + ok = couch_file:write_header(Fd, Header), + Bin = <<"bin">>, + Chunk = couch_file:assemble_file_chunk_and_checksum(Bin), + {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk), + couch_file:close(Fd), + + % We upgrade and xxhash checksums are not the default, but we can + % still read legacy checksums. + enable_xxhash(), + {ok, Fd1} = couch_file:open(Path, []), + {ok, Header1} = couch_file:read_header(Fd1), + ?assertEqual(Header, Header1), + {ok, Bin1} = couch_file:pread_binary(Fd1, Pos), + ?assertEqual(Bin, Bin1), + % one header, one chunk + ?assertEqual(2, legacy_stats()), + + % We'll write some new checksums to the file and then ensure + % we can read both legacy and the new ones + OtherBin = <<"otherbin">>, + OtherChunk = couch_file:assemble_file_chunk_and_checksum(OtherBin), + {ok, OtherPos, _} = couch_file:append_raw_chunk(Fd1, OtherChunk), + couch_file:close(Fd1), + + couch_stats:decrement_counter([couchdb, legacy_checksums], legacy_stats()), + {ok, Fd2} = couch_file:open(Path, []), + {ok, Header2} = couch_file:read_header(Fd2), + ?assertEqual(Header, Header2), + {ok, Bin2} = couch_file:pread_binary(Fd2, Pos), + {ok, OtherBin1} = couch_file:pread_binary(Fd2, OtherPos), + ?assertEqual(Bin, Bin2), + ?assertEqual(OtherBin, OtherBin1), + % one header, legacy chunk, not counting new chunk + ?assertEqual(2, legacy_stats()), + couch_file:close(Fd2). + +enable_xxhash() -> + couch_file:reset_checksum_persistent_term_config(), + reset_legacy_checksum_stats(), + config:set("couchdb", "write_xxhash_checksums", "true", _Persist = false). + +disable_xxhash() -> + couch_file:reset_checksum_persistent_term_config(), + reset_legacy_checksum_stats(), + config:set("couchdb", "write_xxhash_checksums", "false", _Persist = false). + +legacy_stats() -> + couch_stats:sample([couchdb, legacy_checksums]). + +reset_legacy_checksum_stats() -> + Counter = couch_stats:sample([couchdb, legacy_checksums]), + couch_stats:decrement_counter([couchdb, legacy_checksums], Counter). -- cgit v1.2.1