diff options
Diffstat (limited to 'erts/emulator/beam/jit/x86/instr_map.cpp')
-rw-r--r-- | erts/emulator/beam/jit/x86/instr_map.cpp | 117 |
1 files changed, 84 insertions, 33 deletions
diff --git a/erts/emulator/beam/jit/x86/instr_map.cpp b/erts/emulator/beam/jit/x86/instr_map.cpp index 4ead792fab..5f89077ba6 100644 --- a/erts/emulator/beam/jit/x86/instr_map.cpp +++ b/erts/emulator/beam/jit/x86/instr_map.cpp @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2020-2022. All Rights Reserved. + * Copyright Ericsson AB 2020-2023. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ using namespace asmjit; extern "C" { #include "erl_map.h" +#include "erl_term_hashing.h" #include "beam_common.h" } @@ -50,6 +51,12 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { a.add(lower, imm(HCONST)); a.add(upper, imm(HCONST)); +#if defined(ERL_INTERNAL_HASH_CRC32C) + a.mov(ARG6d, hash); + a.crc32(hash, lower); + a.add(hash, ARG6d); + a.crc32(hash, upper); +#else using rounds = std::initializer_list<std::tuple<x86::Gp, x86::Gp, x86::Gp, int>>; for (const auto &round : rounds{{lower, upper, hash, 13}, @@ -78,6 +85,7 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { a.xor_(r_a, ARG6d); } +#endif #ifdef DBG_HASHMAP_COLLISION_BONANZA a.mov(TMP_MEM1q, ARG1); @@ -157,7 +165,7 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { emit_ptr_val(node, node); /* Have we found our leaf? */ - a.test(node.r32(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); + a.test(node.r8(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); a.short_().je(leaf_node); /* Nope, we have to search another node. */ @@ -238,13 +246,13 @@ void BeamGlobalAssembler::emit_flatmap_get_element() { void BeamGlobalAssembler::emit_new_map_shared() { emit_enter_frame(); - emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_enter_runtime<Update::eReductions | Update::eHeapAlloc>(); a.mov(ARG1, c_p); load_x_reg_array(ARG2); runtime_call<5>(erts_gc_new_map); - emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_leave_runtime<Update::eReductions | Update::eHeapAlloc>(); emit_leave_frame(); a.ret(); @@ -266,37 +274,80 @@ void BeamModuleAssembler::emit_new_map(const ArgRegister &Dst, mov_arg(Dst, RET); } -void BeamGlobalAssembler::emit_i_new_small_map_lit_shared() { - emit_enter_frame(); - emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); - - a.mov(ARG1, c_p); - load_x_reg_array(ARG2); - runtime_call<5>(erts_gc_new_small_map_lit); - - emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); - emit_leave_frame(); - - a.ret(); -} - void BeamModuleAssembler::emit_i_new_small_map_lit(const ArgRegister &Dst, const ArgWord &Live, const ArgLiteral &Keys, const ArgWord &Size, const Span<ArgVal> &args) { - Label data = embed_vararg_rodata(args, CP_SIZE); - ASSERT(Size.get() == args.size()); - ASSERT(Keys.isLiteral()); - mov_arg(ARG3, Keys); - mov_imm(ARG4, Live.get()); - a.lea(ARG5, x86::qword_ptr(data)); + emit_gc_test(ArgWord(0), + ArgWord(args.size() + MAP_HEADER_FLATMAP_SZ + 1), + Live); - fragment_call(ga->get_i_new_small_map_lit_shared()); + std::vector<ArgVal> data; + data.reserve(args.size() + MAP_HEADER_FLATMAP_SZ + 1); + data.push_back(ArgWord(MAP_HEADER_FLATMAP)); + data.push_back(Size); + data.push_back(Keys); - mov_arg(Dst, RET); + for (auto arg : args) { + data.push_back(arg); + } + + size_t size = data.size(); + unsigned i; + + mov_arg(x86::qword_ptr(HTOP), data[0]); + + /* Starting from 1 instead of 0 gives more opportunities for + * applying the MMX optimizations. */ + for (i = 1; i < size - 1; i += 2) { + x86::Mem dst_ptr0 = x86::qword_ptr(HTOP, i * sizeof(Eterm)); + x86::Mem dst_ptr1 = x86::qword_ptr(HTOP, (i + 1) * sizeof(Eterm)); + auto first = data[i]; + auto second = data[i + 1]; + + switch (ArgVal::memory_relation(first, second)) { + case ArgVal::consecutive: { + x86::Mem src_ptr = getArgRef(first, 16); + + comment("(initializing two elements at once)"); + dst_ptr0.setSize(16); + vmovups(x86::xmm0, src_ptr); + vmovups(dst_ptr0, x86::xmm0); + break; + } + case ArgVal::reverse_consecutive: { + if (!hasCpuFeature(CpuFeatures::X86::kAVX)) { + mov_arg(dst_ptr0, first); + mov_arg(dst_ptr1, second); + } else { + x86::Mem src_ptr = getArgRef(second, 16); + + comment("(initializing with two swapped elements at once)"); + dst_ptr0.setSize(16); + a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ + a.vmovups(dst_ptr0, x86::xmm0); + } + break; + } + case ArgVal::none: + mov_arg(dst_ptr0, first); + mov_arg(dst_ptr1, second); + break; + } + } + + if (i < size) { + x86::Mem dst_ptr = x86::qword_ptr(HTOP, i * sizeof(Eterm)); + mov_arg(dst_ptr, data[i]); + } + + a.lea(ARG1, x86::byte_ptr(HTOP, TAG_PRIMARY_BOXED)); + a.add(HTOP, imm(size * sizeof(Eterm))); + + mov_arg(Dst, ARG1); } /* ARG1 = map, ARG2 = key @@ -357,7 +408,7 @@ void BeamModuleAssembler::emit_i_get_map_element(const ArgLabel &Fail, mov_arg(ARG1, Src); mov_arg(ARG2, Key); - if (masked_types(Key, BEAM_TYPE_MASK_IMMEDIATE) != BEAM_TYPE_NONE && + if (maybe_one_of<BeamTypeId::MaybeImmediate>(Key) && hasCpuFeature(CpuFeatures::X86::kBMI2)) { safe_fragment_call(ga->get_i_get_map_element_shared()); a.jne(resolve_beam_label(Fail)); @@ -523,13 +574,13 @@ void BeamModuleAssembler::emit_i_get_map_element_hash(const ArgLabel &Fail, /* ARG3 = live registers, ARG4 = update vector size, ARG5 = update vector. */ void BeamGlobalAssembler::emit_update_map_assoc_shared() { emit_enter_frame(); - emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_enter_runtime<Update::eReductions | Update::eHeapAlloc>(); a.mov(ARG1, c_p); load_x_reg_array(ARG2); runtime_call<5>(erts_gc_update_map_assoc); - emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_leave_runtime<Update::eReductions | Update::eHeapAlloc>(); emit_leave_frame(); a.ret(); @@ -559,13 +610,13 @@ void BeamModuleAssembler::emit_update_map_assoc(const ArgSource &Src, * Result is returned in RET, error is indicated by ZF. */ void BeamGlobalAssembler::emit_update_map_exact_guard_shared() { emit_enter_frame(); - emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_enter_runtime<Update::eReductions | Update::eHeapAlloc>(); a.mov(ARG1, c_p); load_x_reg_array(ARG2); runtime_call<5>(erts_gc_update_map_exact); - emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_leave_runtime<Update::eReductions | Update::eHeapAlloc>(); emit_leave_frame(); emit_test_the_non_value(RET); @@ -579,13 +630,13 @@ void BeamGlobalAssembler::emit_update_map_exact_body_shared() { Label error = a.newLabel(); emit_enter_frame(); - emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_enter_runtime<Update::eReductions | Update::eHeapAlloc>(); a.mov(ARG1, c_p); load_x_reg_array(ARG2); runtime_call<5>(erts_gc_update_map_exact); - emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_leave_runtime<Update::eReductions | Update::eHeapAlloc>(); emit_leave_frame(); emit_test_the_non_value(RET); |