diff options
Diffstat (limited to 'erts/emulator/beam/jit/x86/instr_float.cpp')
-rw-r--r-- | erts/emulator/beam/jit/x86/instr_float.cpp | 67 |
1 files changed, 42 insertions, 25 deletions
diff --git a/erts/emulator/beam/jit/x86/instr_float.cpp b/erts/emulator/beam/jit/x86/instr_float.cpp index d3e47d49dc..97ea29f940 100644 --- a/erts/emulator/beam/jit/x86/instr_float.cpp +++ b/erts/emulator/beam/jit/x86/instr_float.cpp @@ -35,10 +35,14 @@ void BeamGlobalAssembler::emit_check_float_error() { Label error = a.newLabel(), floatMax = a.newLabel(), floatSignMask = a.newLabel(); - a.movsd(x86::xmm2, x86::xmm0); - a.movsd(x86::xmm1, x86::qword_ptr(floatMax)); - a.andpd(x86::xmm2, x86::xmmword_ptr(floatSignMask)); - a.ucomisd(x86::xmm1, x86::xmm2); + vmovsd(x86::xmm1, x86::qword_ptr(floatMax)); + if (hasCpuFeature(CpuFeatures::X86::kAVX)) { + a.vandpd(x86::xmm2, x86::xmm0, x86::xmmword_ptr(floatSignMask)); + } else { + a.movq(x86::xmm2, x86::xmm0); + a.andpd(x86::xmm2, x86::xmmword_ptr(floatSignMask)); + } + vucomisd(x86::xmm1, x86::xmm2); a.short_().jb(error); a.ret(); @@ -57,16 +61,21 @@ void BeamGlobalAssembler::emit_check_float_error() { a.embedDouble(std::numeric_limits<double>::max()); } -void BeamModuleAssembler::emit_float_instr(uint32_t instId, +void BeamModuleAssembler::emit_float_instr(uint32_t instIdSSE, + uint32_t instIdAVX, const ArgFRegister &LHS, const ArgFRegister &RHS, const ArgFRegister &Dst) { - a.movsd(x86::xmm0, getArgRef(LHS)); - a.movsd(x86::xmm1, getArgRef(RHS)); + vmovsd(x86::xmm0, getArgRef(LHS)); + vmovsd(x86::xmm1, getArgRef(RHS)); - a.emit(instId, x86::xmm0, x86::xmm1); + if (hasCpuFeature(CpuFeatures::X86::kAVX)) { + a.emit(instIdAVX, x86::xmm0, x86::xmm0, x86::xmm1); + } else { + a.emit(instIdSSE, x86::xmm0, x86::xmm1); + } safe_fragment_call(ga->get_check_float_error()); - a.movsd(getArgRef(Dst), x86::xmm0); + vmovsd(getArgRef(Dst), x86::xmm0); } void BeamModuleAssembler::emit_fload(const ArgSource &Src, @@ -76,17 +85,17 @@ void BeamModuleAssembler::emit_fload(const ArgSource &Src, x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); - a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); - a.movsd(getArgRef(Dst), x86::xmm0); + vmovsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + vmovsd(getArgRef(Dst), x86::xmm0); } void BeamModuleAssembler::emit_fstore(const ArgFRegister &Src, const ArgRegister &Dst) { - a.movsd(x86::xmm0, getArgRef(Src)); + vmovsd(x86::xmm0, getArgRef(Src)); /* {thing_word,double} */ a.mov(x86::qword_ptr(HTOP), imm(HEADER_FLONUM)); - a.movsd(x86::qword_ptr(HTOP, sizeof(Eterm)), x86::xmm0); + vmovsd(x86::qword_ptr(HTOP, sizeof(Eterm)), x86::xmm0); a.lea(ARG1, x86::qword_ptr(HTOP, make_float(0))); mov_arg(Dst, ARG1); @@ -122,7 +131,7 @@ void BeamGlobalAssembler::emit_fconv_shared() { a.test(RETd, RETd); a.short_().js(error); - a.movsd(x86::xmm0, TMP_MEM1q); + vmovsd(x86::xmm0, TMP_MEM1q); a.ret(); a.bind(error); @@ -140,8 +149,12 @@ void BeamModuleAssembler::emit_fconv(const ArgSource &Src, comment("simplified fconv since source is always small"); mov_arg(ARG2, Src); a.sar(ARG2, imm(_TAG_IMMED1_SIZE)); - a.cvtsi2sd(x86::xmm0, ARG2); - a.movsd(getArgRef(Dst), x86::xmm0); + if (hasCpuFeature(CpuFeatures::X86::kAVX)) { + a.vcvtsi2sd(x86::xmm0, x86::xmm0, ARG2); + } else { + a.cvtsi2sd(x86::xmm0, ARG2); + } + vmovsd(getArgRef(Dst), x86::xmm0); return; } @@ -153,17 +166,21 @@ void BeamModuleAssembler::emit_fconv(const ArgSource &Src, emit_is_small(not_small, Src, ARG2); a.sar(ARG2, imm(_TAG_IMMED1_SIZE)); - a.cvtsi2sd(x86::xmm0, ARG2); + if (hasCpuFeature(CpuFeatures::X86::kAVX)) { + a.vcvtsi2sd(x86::xmm0, x86::xmm0, ARG2); + } else { + a.cvtsi2sd(x86::xmm0, ARG2); + } a.short_().jmp(next); a.bind(not_small); { - if (masked_types(Src, BEAM_TYPE_FLOAT) == BEAM_TYPE_NONE) { + if (never_one_of<BeamTypeId::Float>(Src)) { comment("skipped float path since source cannot be a float"); } else { /* If the source is always a number, we can skip the box test when * it's not a small. */ - if (always_one_of(Src, BEAM_TYPE_FLOAT | BEAM_TYPE_INTEGER)) { + if (always_one_of<BeamTypeId::Number>(Src)) { comment("skipped box test since source is always a number"); } else { emit_is_boxed(fallback, Src, ARG2, dShort); @@ -172,7 +189,7 @@ void BeamModuleAssembler::emit_fconv(const ArgSource &Src, /* Speculatively load the float value, this is safe since all boxed * terms are at least two words long. */ auto boxed_ptr = emit_ptr_val(ARG1, ARG2); - a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + vmovsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); a.cmp(emit_boxed_val(boxed_ptr), imm(HEADER_FLONUM)); a.short_().je(next); @@ -184,31 +201,31 @@ void BeamModuleAssembler::emit_fconv(const ArgSource &Src, } a.bind(next); - a.movsd(getArgRef(Dst), x86::xmm0); + vmovsd(getArgRef(Dst), x86::xmm0); } void BeamModuleAssembler::emit_i_fadd(const ArgFRegister &LHS, const ArgFRegister &RHS, const ArgFRegister &Dst) { - emit_float_instr(x86::Inst::kIdAddpd, LHS, RHS, Dst); + emit_float_instr(x86::Inst::kIdAddpd, x86::Inst::kIdVaddpd, LHS, RHS, Dst); } void BeamModuleAssembler::emit_i_fsub(const ArgFRegister &LHS, const ArgFRegister &RHS, const ArgFRegister &Dst) { - emit_float_instr(x86::Inst::kIdSubpd, LHS, RHS, Dst); + emit_float_instr(x86::Inst::kIdSubpd, x86::Inst::kIdVsubpd, LHS, RHS, Dst); } void BeamModuleAssembler::emit_i_fmul(const ArgFRegister &LHS, const ArgFRegister &RHS, const ArgFRegister &Dst) { - emit_float_instr(x86::Inst::kIdMulpd, LHS, RHS, Dst); + emit_float_instr(x86::Inst::kIdMulpd, x86::Inst::kIdVmulpd, LHS, RHS, Dst); } void BeamModuleAssembler::emit_i_fdiv(const ArgFRegister &LHS, const ArgFRegister &RHS, const ArgFRegister &Dst) { - emit_float_instr(x86::Inst::kIdDivpd, LHS, RHS, Dst); + emit_float_instr(x86::Inst::kIdDivpd, x86::Inst::kIdVdivpd, LHS, RHS, Dst); } void BeamModuleAssembler::emit_i_fnegate(const ArgFRegister &Src, |