diff options
-rw-r--r-- | erts/emulator/beam/erl_term_hashing.c | 6 | ||||
-rw-r--r-- | erts/emulator/beam/jit/arm/instr_map.cpp | 5 | ||||
-rw-r--r-- | erts/emulator/beam/jit/x86/beam_asm.hpp | 37 | ||||
-rw-r--r-- | erts/emulator/beam/jit/x86/instr_map.cpp | 8 | ||||
-rw-r--r-- | erts/emulator/test/persistent_term_SUITE.erl | 153 |
5 files changed, 122 insertions, 87 deletions
diff --git a/erts/emulator/beam/erl_term_hashing.c b/erts/emulator/beam/erl_term_hashing.c index 938b287e85..904e493f8c 100644 --- a/erts/emulator/beam/erl_term_hashing.c +++ b/erts/emulator/beam/erl_term_hashing.c @@ -1376,14 +1376,16 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p) # if defined(__x86_64__) # define MIX(a,b,c) \ do { \ + Uint32 initial_hash = c; \ c = __builtin_ia32_crc32si(c, a); \ - c = __builtin_ia32_crc32si(c, b); \ + c = __builtin_ia32_crc32si(c + initial_hash, b); \ } while(0) # elif defined(__aarch64__) # define MIX(a,b,c) \ do { \ + Uint32 initial_hash = c; \ c = __crc32cw(c, a); \ - c = __crc32cw(c, b); \ + c = __crc32cw(c + initial_hash, b); \ } while(0) # else # error "No suitable CRC32 intrinsic available." diff --git a/erts/emulator/beam/jit/arm/instr_map.cpp b/erts/emulator/beam/jit/arm/instr_map.cpp index b0d82726a1..fd837cf93f 100644 --- a/erts/emulator/beam/jit/arm/instr_map.cpp +++ b/erts/emulator/beam/jit/arm/instr_map.cpp @@ -54,8 +54,9 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { a.add(upper, upper, constant); #if defined(ERL_INTERNAL_HASH_CRC32C) - a.crc32cw(hash, lower, hash); - a.crc32cw(hash, upper, hash); + a.crc32cw(lower, hash, lower); + a.add(hash, hash, lower); + a.crc32cw(hash, hash, upper); #else using rounds = std::initializer_list<std::tuple<a64::Gp, a64::Gp, a64::Gp, int>>; diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp index 126faeb4db..96b40b84b3 100644 --- a/erts/emulator/beam/jit/x86/beam_asm.hpp +++ b/erts/emulator/beam/jit/x86/beam_asm.hpp @@ -881,14 +881,27 @@ protected: from.setSize(0); to.setSize(0); - using vectors = std::initializer_list< - std::tuple<x86::Vec, Sint32, CpuFeatures::X86::Id>>; - for (const auto &spec : - vectors{{x86::zmm0, 8, CpuFeatures::X86::kAVX512_VL}, - {x86::zmm0, 8, CpuFeatures::X86::kAVX512_F}, - {x86::ymm0, 4, CpuFeatures::X86::kAVX}, - {x86::xmm0, 2, CpuFeatures::X86::kSSE}}) { - const auto &[vector_reg, vector_size, feature] = spec; + using vectors = std::initializer_list<std::tuple<x86::Vec, + Sint32, + x86::Inst::Id, + CpuFeatures::X86::Id>>; + for (const auto &spec : vectors{{x86::zmm0, + 8, + x86::Inst::kIdVmovups, + CpuFeatures::X86::kAVX512_VL}, + {x86::zmm0, + 8, + x86::Inst::kIdVmovups, + CpuFeatures::X86::kAVX512_F}, + {x86::ymm0, + 4, + x86::Inst::kIdVmovups, + CpuFeatures::X86::kAVX}, + {x86::xmm0, + 2, + x86::Inst::kIdMovups, + CpuFeatures::X86::kSSE}}) { + const auto &[vector_reg, vector_size, vector_inst, feature] = spec; if (!hasCpuFeature(feature)) { continue; @@ -898,8 +911,8 @@ protected: * largest vector size we're capable of. */ if (count <= vector_size * 4) { while (count >= vector_size) { - a.vmovups(vector_reg, from); - a.vmovups(to, vector_reg); + a.emit(vector_inst, vector_reg, from); + a.emit(vector_inst, to, vector_reg); from.addOffset(sizeof(UWord) * vector_size); to.addOffset(sizeof(UWord) * vector_size); @@ -920,8 +933,8 @@ protected: mov_imm(spill, -loop_size); a.bind(copy_next); { - a.vmovups(vector_reg, from); - a.vmovups(to, vector_reg); + a.emit(vector_inst, vector_reg, from); + a.emit(vector_inst, to, vector_reg); a.add(spill, imm(vector_size * sizeof(UWord))); a.short_().jne(copy_next); diff --git a/erts/emulator/beam/jit/x86/instr_map.cpp b/erts/emulator/beam/jit/x86/instr_map.cpp index b1ef0c966d..93847259de 100644 --- a/erts/emulator/beam/jit/x86/instr_map.cpp +++ b/erts/emulator/beam/jit/x86/instr_map.cpp @@ -47,13 +47,15 @@ static const Uint32 HCONST = 0x9E3779B9; * * Result is returned in ARG3. */ void BeamGlobalAssembler::emit_internal_hash_helper() { - x86::Gp hash = ARG3d, lower = ARG4d, upper = ARG5d; + x86::Gp hash = ARG3d, lower = ARG4d, upper = ARG5d, constant = ARG6d; - a.add(lower, ARG6d); - a.add(upper, ARG6d); + a.add(lower, constant); + a.add(upper, constant); #if defined(ERL_INTERNAL_HASH_CRC32C) + a.mov(constant, hash); a.crc32(hash, lower); + a.add(hash, constant); a.crc32(hash, upper); #else using rounds = diff --git a/erts/emulator/test/persistent_term_SUITE.erl b/erts/emulator/test/persistent_term_SUITE.erl index 35216aa78d..39dd2d3428 100644 --- a/erts/emulator/test/persistent_term_SUITE.erl +++ b/erts/emulator/test/persistent_term_SUITE.erl @@ -604,27 +604,54 @@ collisions_delete([], _) -> colliding_keys() -> %% Collisions found by find_colliding_keys() below - L = [[77674392,148027], - [103370644,950908], - [106444046,870178], - [22217246,735880], - [18088843,694607], - [63426007,612179], - [117354942,906431], - [121434305,94282311,816072], - [118441466,93873772,783366], - [124338174,1414801,123089], - [20240282,17113486,923647], - [126495528,61463488,164994], - [125341723,5729072,445539], - [127450932,80442669,348245], - [123354692,85724182,14241288,180793], - [99159367,65959274,61680971,289939], - [107637580,104512101,62639807,181644], - [139547511,51654420,2062545,151944], - [88078274,73031465,53388204,428872], - [141314238,75761379,55699508,861797], - [88045216,59272943,21030492,180903]], + %% ct:timetrap({minutes, 60}), + %% ct:pal("Colliding keys = ~p", [find_colliding_keys()]), + Collisions = + #{ + %% Collisions for Jenkins96 hashing. + 1268203079 => [[77674392,148027], + [103370644,950908], + [106444046,870178], + [22217246,735880], + [18088843,694607], + [63426007,612179], + [117354942,906431], + [121434305,94282311,816072], + [118441466,93873772,783366], + [124338174,1414801,123089], + [20240282,17113486,923647], + [126495528,61463488,164994], + [125341723,5729072,445539], + [127450932,80442669,348245], + [123354692,85724182,14241288,180793], + [99159367,65959274,61680971,289939], + [107637580,104512101,62639807,181644], + [139547511,51654420,2062545,151944], + [88078274,73031465,53388204,428872], + [141314238,75761379,55699508,861797], + [88045216,59272943,21030492,180903]], + %% Collisions for CRC32-C hashing. + 1982459178 => [[-4294967296,654663773], + [-3758096384,117792861], + [-3221225472,1728405597], + [-2684354560,1191534685], + [-2147483648,2706162303], + [-1610612736,2169291391], + [-1073741824,3779904127], + [-536870912,3243033215], + [-3640303523,0], + [-4177174435,536870912], + [-2566561699,1073741824], + [-3103432611,1610612736], + [-1588804993,2147483648], + [-2125675905,2684354560], + [-515063169,3221225472], + [-1051934081,3758096384]] + }, + + Key = internal_hash(2), + ct:pal("internal_hash(2) = ~p", [Key]), + #{ Key := L } = Collisions, %% Verify that the keys still collide (this will fail if the %% internal hash function has been changed). @@ -649,57 +676,47 @@ internal_hash(Term) -> erts_debug:get_internal_state({internal_hash,Term}). %% Use this function to (re)generate the list in colliding_keys/0 +%% +%% Grab a coffee, it will take a while. find_colliding_keys() -> - MaxCollSz = 4, - OfEachSz = 7, erts_debug:set_internal_state(available_internal_state, true), - MaxInserts = 1 bsl 20, - T = ets:new(x, [set, private]), - ok = fck_loop_1(T, 1, MaxInserts, MaxCollSz, OfEachSz), - fck_collect(T, MaxCollSz, OfEachSz, []). - -fck_collect(_T, 1, _OfEachSz, Acc) -> - Acc; -fck_collect(T, CollSz, OfEachSz, Acc) -> - {List, _} = ets:select(T, - [{{'$1','$2'}, [{'==',{length,'$2'},CollSz}], ['$2']}], - OfEachSz), - fck_collect(T, CollSz-1, OfEachSz, List ++ Acc). - - -fck_loop_1(T, Key, 0, MaxCollSz, MaxSzLeft) -> - fck_loop_2(T, Key, MaxCollSz, MaxSzLeft); -fck_loop_1(T, Key, Inserts, MaxCollSz, MaxSzLeft) -> - Hash = internal_hash(Key), - case ets:insert_new(T, {Hash, [Key]}) of - true -> - fck_loop_1(T, Key+1, Inserts-1, MaxCollSz, MaxSzLeft); - false -> - [{Hash, KeyList}] = ets:lookup(T, Hash), - true = ets:insert(T, {Hash, [Key | KeyList]}), - fck_loop_1(T, Key+1, Inserts, MaxCollSz, MaxSzLeft) - end. - -fck_loop_2(_T, _Key, _MaxCollSz, 0) -> - ok; -fck_loop_2(T, Key, MaxCollSz, MaxSzLeft0) -> - Hash = internal_hash(Key), - case ets:lookup(T, Hash) of - [] -> - fck_loop_2(T, Key+1, MaxCollSz, MaxSzLeft0); - [{Hash, KeyList}] -> - true = ets:insert(T, {Hash, [Key | KeyList]}), - MaxSzLeft1 = case length(KeyList)+1 of - MaxCollSz -> - MaxSzLeft0 - 1; - _ -> - MaxSzLeft0 - end, - fck_loop_2(T, Key+1, MaxCollSz, MaxSzLeft1) + NumScheds = erlang:system_info(schedulers_online), + Start = -(1 bsl 32), + End = -Start, + Range = End - Start, + Step = Range div NumScheds, + timer:tc(fun() -> fck_spawn(NumScheds, NumScheds, Start, End, Step, []) end). + +fck_spawn(0, _NumScheds, _Start, _End, _Step, Refs) -> + fck_await(Refs); +fck_spawn(N, NumScheds, Start, End, Step, Refs) -> + Key = Start + (N - 1) * Step, + {_, Ref} = spawn_monitor(fun() -> exit(fck_finder(Start, End, Key)) end), + fck_spawn(N - 1, NumScheds, Start, End, Step, [Ref | Refs]). + +fck_await([Ref | Refs]) -> + receive + {'DOWN', Ref, _, _, [_Initial]} -> + %% Ignore slices where the initial value only collided with itself. + fck_await(Refs); + {'DOWN', Ref, _, _, Collisions} -> + [Collisions | fck_await(Refs)] + end; +fck_await([]) -> + []. + +fck_finder(Start, End, Key) -> + true = Key >= Start, true = Key < End, %Assertion. + fck_finder_1(Start, End, internal_hash(Key)). + +fck_finder_1(Same, Same, _Target) -> + []; +fck_finder_1(Next, End, Target) -> + case internal_hash(Next) =:= Target of + true -> [Next | fck_finder_1(Next + 1, End, Target)]; + false -> fck_finder_1(Next + 1, End, Target) end. - - %% OTP-17700 Bug skipped refc++ of shared magic reference shared_magic_ref(_Config) -> Ref = atomics:new(10, []), |