diff options
Diffstat (limited to 'src/couch/src/couch_file.erl')
-rw-r--r-- | src/couch/src/couch_file.erl | 108 |
1 files changed, 79 insertions, 29 deletions
diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl index 514d4e3d9..09bc5f28f 100644 --- a/src/couch/src/couch_file.erl +++ b/src/couch/src/couch_file.erl @@ -23,6 +23,8 @@ -define(IS_OLD_STATE(S), is_pid(S#file.db_monitor)). -define(PREFIX_SIZE, 5). -define(DEFAULT_READ_COUNT, 1024). +-define(WRITE_XXHASH_CHECKSUMS_KEY, {?MODULE, write_xxhash_checksums}). +-define(WRITE_XXHASH_CHECKSUMS_DEFAULT, false). -type block_id() :: non_neg_integer(). -type location() :: non_neg_integer(). @@ -55,6 +57,9 @@ %% helper functions -export([process_info/1]). +% test helper functions +-export([reset_checksum_persistent_term_config/0]). + %%---------------------------------------------------------------------- %% Args: Valid Options are [create] and [create,overwrite]. %% Files are opened in read/write mode. @@ -142,8 +147,8 @@ assemble_file_chunk(Bin) -> [<<0:1/integer, (iolist_size(Bin)):31/integer>>, Bin]. assemble_file_chunk_and_checksum(Bin) -> - Md5 = couch_hash:md5_hash(Bin), - [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Md5, Bin]. + Checksum = generate_checksum(Bin), + [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Checksum, Bin]. %%---------------------------------------------------------------------- %% Purpose: Reads a term from a file that was written with append_term @@ -169,8 +174,8 @@ pread_binary(Fd, Pos) -> pread_iolist(Fd, Pos) -> case ioq:call(Fd, {pread_iolist, Pos}, erlang:get(io_priority)) of - {ok, IoList, Md5} -> - {ok, verify_md5(Fd, Pos, IoList, Md5)}; + {ok, IoList, Checksum} -> + {ok, verify_checksum(Fd, Pos, IoList, Checksum)}; Error -> Error end. @@ -191,13 +196,13 @@ pread_binaries(Fd, PosList) -> pread_iolists(Fd, PosList) -> case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of - {ok, DataMd5s} -> + {ok, DataAndChecksums} -> Data = lists:zipwith( - fun(Pos, {IoList, Md5}) -> - verify_md5(Fd, Pos, IoList, Md5) + fun(Pos, {IoList, Checksum}) -> + verify_checksum(Fd, Pos, IoList, Checksum) end, PosList, - DataMd5s + DataAndChecksums ), {ok, Data}; Error -> @@ -400,9 +405,9 @@ read_header(Fd) -> write_header(Fd, Data) -> Bin = term_to_binary(Data), - Md5 = couch_hash:md5_hash(Bin), + Checksum = generate_checksum(Bin), % now we assemble the final header binary and write to disk - FinalBin = <<Md5/binary, Bin/binary>>, + FinalBin = <<Checksum/binary, Bin/binary>>, ioq:call(Fd, {write_header, FinalBin}, erlang:get(io_priority)). init_status_error(ReturnPid, Ref, Error) -> @@ -504,11 +509,11 @@ handle_call({pread_iolist, Pos}, _From, File) -> update_read_timestamp(), {LenIolist, NextPos} = read_raw_iolist_int(File, Pos, 4), case iolist_to_binary(LenIolist) of - % an MD5-prefixed term + % an checksum-prefixed term <<1:1/integer, Len:31/integer>> -> - {Md5AndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16), - {Md5, IoList} = extract_md5(Md5AndIoList), - {reply, {ok, IoList, Md5}, File}; + {ChecksumAndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16), + {Checksum, IoList} = extract_checksum(ChecksumAndIoList), + {reply, {ok, IoList, Checksum}, File}; <<0:1/integer, Len:31/integer>> -> {Iolist, _} = read_raw_iolist_int(File, NextPos, Len), {reply, {ok, Iolist, <<>>}, File} @@ -520,7 +525,7 @@ handle_call({pread_iolists, PosL}, _From, File) -> LocNums2 = lists:map( fun({LenIoList, NextPos}) -> case iolist_to_binary(LenIoList) of - % an MD5-prefixed term + % a checksum-prefixed term <<1:1/integer, Len:31/integer>> -> {NextPos, Len + 16}; <<0:1/integer, Len:31/integer>> -> @@ -534,8 +539,8 @@ handle_call({pread_iolists, PosL}, _From, File) -> fun({LenIoList, _}, {IoList, _}) -> case iolist_to_binary(LenIoList) of <<1:1/integer, _:31/integer>> -> - {Md5, IoList} = extract_md5(IoList), - {IoList, Md5}; + {Checksum, IoList} = extract_checksum(IoList), + {IoList, Checksum}; <<0:1/integer, _:31/integer>> -> {IoList, <<>>} end @@ -674,9 +679,15 @@ load_header(Fd, Pos, HeaderLen, RestBlock) -> {ok, Missing} = file:pread(Fd, ReadStart, ReadLen), <<RestBlock/binary, Missing/binary>> end, - <<Md5Sig:16/binary, HeaderBin/binary>> = + <<Checksum:16/binary, HeaderBin/binary>> = iolist_to_binary(remove_block_prefixes(?PREFIX_SIZE, RawBin)), - Md5Sig = couch_hash:md5_hash(HeaderBin), + case exxhash:xxhash128(HeaderBin) of + Checksum -> + ok; + <<_/binary>> -> + Checksum = couch_hash:md5_hash(HeaderBin), + legacy_checksums_stats_update() + end, {ok, HeaderBin}. %% Read multiple block locations using a single file:pread/2. @@ -779,10 +790,10 @@ get_pread_locnum(File, Pos, Len) -> {Pos, TotalBytes} end. --spec extract_md5(iolist()) -> {binary(), iolist()}. -extract_md5(FullIoList) -> - {Md5List, IoList} = split_iolist(FullIoList, 16, []), - {iolist_to_binary(Md5List), IoList}. +-spec extract_checksum(iolist()) -> {binary(), iolist()}. +extract_checksum(FullIoList) -> + {ChecksumList, IoList} = split_iolist(FullIoList, 16, []), + {iolist_to_binary(ChecksumList), IoList}. calculate_total_read_len(0, FinalLen) -> calculate_total_read_len(1, FinalLen) + 1; @@ -852,15 +863,23 @@ monitored_by_pids() -> {monitored_by, PidsAndRefs} = process_info(self(), monitored_by), lists:filter(fun is_pid/1, PidsAndRefs). -verify_md5(_Fd, _Pos, IoList, <<>>) -> +verify_checksum(_Fd, _Pos, IoList, <<>>) -> IoList; -verify_md5(Fd, Pos, IoList, Md5) -> - case couch_hash:md5_hash(IoList) of - Md5 -> IoList; - _ -> report_md5_error(Fd, Pos) +verify_checksum(Fd, Pos, IoList, Checksum) -> + case exxhash:xxhash128(iolist_to_binary(IoList)) of + Checksum -> + IoList; + <<_/binary>> -> + case couch_hash:md5_hash(IoList) of + Checksum -> + legacy_checksums_stats_update(), + IoList; + _ -> + report_checksum_error(Fd, Pos) + end end. -report_md5_error(Fd, Pos) -> +report_checksum_error(Fd, Pos) -> couch_log:emergency("File corruption in ~p at position ~B", [Fd, Pos]), exit({file_corruption, <<"file corruption">>}). @@ -906,6 +925,37 @@ reset_eof(#file{} = File) -> {ok, Eof} = file:position(File#file.fd, eof), File#file{eof = Eof}. +-spec generate_checksum(binary()) -> <<_:128>>. +generate_checksum(Bin) when is_binary(Bin) -> + case generate_xxhash_checksums() of + true -> <<_:128>> = exxhash:xxhash128(Bin); + false -> <<_:128>> = couch_hash:md5_hash(Bin) + end. + +legacy_checksums_stats_update() -> + % Bump stats only if we're writing new checksums. + case generate_xxhash_checksums() of + true -> couch_stats:increment_counter([couchdb, legacy_checksums]); + false -> ok + end. + +reset_checksum_persistent_term_config() -> + persistent_term:erase(?WRITE_XXHASH_CHECKSUMS_KEY). + +generate_xxhash_checksums() -> + % Caching the config value here as we'd need to call this per file chunk + % and also from various processes (not just couch_file pids). Node must be + % restarted for the new value to take effect. + case persistent_term:get(?WRITE_XXHASH_CHECKSUMS_KEY, not_cached) of + not_cached -> + Default = ?WRITE_XXHASH_CHECKSUMS_DEFAULT, + Val = config:get_boolean("couchdb", "write_xxhash_checksums", Default), + persistent_term:put(?WRITE_XXHASH_CHECKSUMS_KEY, Val), + Val; + Val when is_boolean(Val) -> + Val + end. + -ifdef(TEST). -include_lib("couch/include/couch_eunit.hrl"). |