/* * %CopyrightBegin% * * Copyright Ericsson AB 2020-2023. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * %CopyrightEnd% */ #include "beam_asm.hpp" extern "C" { #include "beam_common.h" #include "code_ix.h" #include "erl_bif_table.h" #include "erl_nfunc_sched.h" #include "bif.h" #include "erl_msacc.h" } /* ARG2 = argument vector, ARG4 (!) = bif function pointer * * Result is returned in RET, error is indicated by ZF. */ void BeamGlobalAssembler::emit_i_bif_guard_shared() { emit_enter_frame(); emit_enter_runtime(); a.mov(ARG1, c_p); /* ARG2 has been set by caller; ARG3 is never used by guard BIFs. */ mov_imm(ARG3, 0); runtime_call(ARG4, 3); emit_leave_runtime(); emit_leave_frame(); emit_test_the_non_value(RET); a.ret(); } /* ARG2 = argument vector, ARG4 (!) = bif function pointer * * Result is returned in RET. */ void BeamGlobalAssembler::emit_i_bif_body_shared() { Label error = a.newLabel(); emit_enter_frame(); emit_enter_runtime(); /* Save current BIF and argument vector for the error path. */ a.mov(TMP_MEM1q, ARG2); a.mov(TMP_MEM2q, ARG4); a.mov(ARG1, c_p); /* ARG2 has been set by caller; ARG3 is never used by guard BIFs. */ mov_imm(ARG3, 0); runtime_call(ARG4, 3); emit_test_the_non_value(RET); a.short_().je(error); emit_leave_runtime(); emit_leave_frame(); a.ret(); a.bind(error); { /* Copy arguments into x-registers from the argument vector. We don't * need to care about the actual arity since all x-registers are * clobbered on exceptions. */ a.mov(ARG2, TMP_MEM1q); for (int i = 0; i < 3; i++) { a.mov(ARG1, x86::qword_ptr(ARG2, i * sizeof(Eterm))); a.mov(getXRef(i), ARG1); } /* Find the correct MFA from the BIF's function address. */ a.mov(ARG1, TMP_MEM2q); runtime_call<1>(ubif2mfa); emit_leave_runtime(); emit_leave_frame(); a.mov(ARG4, RET); a.jmp(labels[raise_exception]); } } void BeamModuleAssembler::emit_setup_guard_bif(const std::vector &args, const ArgWord &Bif) { bool is_contiguous_mem = false; ASSERT(args.size() > 0 && args.size() <= 3); /* If the guard BIF's arguments are in memory and continuous, for example * `map_get(x0, x1)`, then we can pass the address of the first argument * instead of filling in the argument vector. */ is_contiguous_mem = args.size() && args[0].isRegister(); for (size_t i = 1; i < args.size() && is_contiguous_mem; i++) { const ArgSource &curr = args[i], &prev = args[i - 1]; is_contiguous_mem = ArgVal::memory_relation(prev, curr) == ArgVal::consecutive; } if (is_contiguous_mem) { a.lea(ARG2, getArgRef(args[0])); } else { a.lea(ARG2, TMP_MEM3q); for (size_t i = 0; i < args.size(); i++) { mov_arg(x86::qword_ptr(ARG2, i * sizeof(Eterm)), args[i]); } } if (logger.file()) { ErtsCodeMFA *mfa = ubif2mfa((void *)Bif.get()); if (mfa) { comment("UBIF: %T/%d", mfa->function, mfa->arity); } } mov_arg(ARG4, Bif); } void BeamModuleAssembler::emit_i_bif1(const ArgSource &Src1, const ArgLabel &Fail, const ArgWord &Bif, const ArgRegister &Dst) { emit_setup_guard_bif({Src1}, Bif); if (Fail.get() != 0) { safe_fragment_call(ga->get_i_bif_guard_shared()); a.je(resolve_beam_label(Fail)); } else { safe_fragment_call(ga->get_i_bif_body_shared()); } mov_arg(Dst, RET); } void BeamModuleAssembler::emit_i_bif2(const ArgSource &Src1, const ArgSource &Src2, const ArgLabel &Fail, const ArgWord &Bif, const ArgRegister &Dst) { emit_setup_guard_bif({Src1, Src2}, Bif); if (Fail.get() != 0) { safe_fragment_call(ga->get_i_bif_guard_shared()); a.je(resolve_beam_label(Fail)); } else { safe_fragment_call(ga->get_i_bif_body_shared()); } mov_arg(Dst, RET); } void BeamModuleAssembler::emit_i_bif3(const ArgSource &Src1, const ArgSource &Src2, const ArgSource &Src3, const ArgLabel &Fail, const ArgWord &Bif, const ArgRegister &Dst) { emit_setup_guard_bif({Src1, Src2, Src3}, Bif); if (Fail.get() != 0) { safe_fragment_call(ga->get_i_bif_guard_shared()); a.je(resolve_beam_label(Fail)); } else { safe_fragment_call(ga->get_i_bif_body_shared()); } mov_arg(Dst, RET); } /* * Emit code for guard BIFs that can't fail (e.g. is_list/1). We * don't need to test for failure and even in a body there is no need * to align the call targeting the shared fragment. */ void BeamModuleAssembler::emit_nofail_bif1(const ArgSource &Src1, const ArgWord &Bif, const ArgRegister &Dst) { emit_setup_guard_bif({Src1}, Bif); safe_fragment_call(ga->get_i_bif_guard_shared()); mov_arg(Dst, RET); } void BeamModuleAssembler::emit_nofail_bif2(const ArgSource &Src1, const ArgSource &Src2, const ArgWord &Bif, const ArgRegister &Dst) { emit_setup_guard_bif({Src1, Src2}, Bif); safe_fragment_call(ga->get_i_bif_guard_shared()); mov_arg(Dst, RET); } void BeamModuleAssembler::emit_i_length_setup(const ArgLabel &Fail, const ArgWord &Live, const ArgSource &Src) { x86::Mem trap_state; /* Store trap state after the currently live registers. There's an extra 3 * registers beyond the ordinary ones that we're free to use for whatever * purpose. */ ERTS_CT_ASSERT(ERTS_X_REGS_ALLOCATED - MAX_REG >= 3); ASSERT(Live.get() <= MAX_REG); trap_state = getXRef(Live.get()); /* Remainder of the list. */ mov_arg(trap_state, Src); /* Accumulated length. */ a.mov(trap_state.cloneAdjusted(1 * sizeof(Eterm)), imm(make_small(0))); /* Original argument. This is only needed for exceptions and can be safely * skipped in guards. */ if (Fail.get() == 0) { x86::Mem original_argument; original_argument = trap_state.cloneAdjusted(2 * sizeof(Eterm)); mov_arg(original_argument, Src); } } /* ARG2 = live registers, ARG3 = entry address * * Result is returned in RET. */ x86::Mem BeamGlobalAssembler::emit_i_length_common(Label fail, int state_size) { Label trap = a.newLabel(); x86::Mem trap_state; ASSERT(state_size >= 2 && state_size <= ERTS_X_REGS_ALLOCATED - MAX_REG); /* getXRef(Live) */ trap_state = getXRef(0); trap_state.setIndex(ARG2, 3); emit_enter_frame(); /* Save arguments for error/trapping path. */ a.mov(TMP_MEM1q, ARG2); a.mov(TMP_MEM2q, ARG3); emit_enter_runtime(); a.mov(ARG1, c_p); a.lea(ARG2, trap_state); runtime_call<2>(erts_trapping_length_1); emit_leave_runtime(); emit_leave_frame(); emit_test_the_non_value(RET); a.short_().je(trap); a.ret(); a.bind(trap); { a.mov(ARG2, TMP_MEM1q); a.mov(ARG3, TMP_MEM2q); a.cmp(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(TRAP)); a.jne(fail); /* The trap state is stored in the registers above the current live * ones, so we add the state size (in words) to keep it alive. */ a.add(ARG2, imm(state_size)); /* We'll find our way back through the entry address (ARG3). */ a.add(x86::rsp, imm(sizeof(UWord))); a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), imm(0)); a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG2); a.jmp(labels[context_switch_simplified]); } return trap_state; } /* ARG2 = live registers, ARG3 = entry address * * Result is returned in RET. */ void BeamGlobalAssembler::emit_i_length_body_shared() { Label error = a.newLabel(); x86::Mem trap_state; /* `state_size = 3` to include the original argument. */ trap_state = emit_i_length_common(error, 3); a.bind(error); { static const ErtsCodeMFA bif_mfa = {am_erlang, am_length, 1}; /* Move the original argument to x0. It's stored in the third word of * the trap state. */ a.mov(ARG1, trap_state.cloneAdjusted(2 * sizeof(Eterm))); a.mov(getXRef(0), ARG1); a.mov(ARG4, imm(&bif_mfa)); a.jmp(labels[raise_exception]); } } /* ARG2 = live registers, ARG3 = entry address * * Result is returned in RET, error is indicated by ZF. */ void BeamGlobalAssembler::emit_i_length_guard_shared() { Label error = a.newLabel(); emit_i_length_common(error, 2); a.bind(error); { a.sub(RET, RET); a.ret(); } } void BeamModuleAssembler::emit_i_length(const ArgLabel &Fail, const ArgWord &Live, const ArgRegister &Dst) { Label entry = a.newLabel(); align_erlang_cp(); a.bind(entry); mov_arg(ARG2, Live); a.lea(ARG3, x86::qword_ptr(entry)); if (Fail.get() != 0) { /* The return address is discarded when yielding, so it doesn't need to * be aligned. */ safe_fragment_call(ga->get_i_length_guard_shared()); a.je(resolve_beam_label(Fail)); } else { safe_fragment_call(ga->get_i_length_body_shared()); } mov_arg(Dst, RET); } #if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) static Eterm debug_call_light_bif(Process *c_p, Eterm *reg, ErtsCodePtr I, ErtsBifFunc vbf) { Eterm result; ERTS_UNREQ_PROC_MAIN_LOCK(c_p); { ERTS_CHK_MBUF_SZ(c_p); ASSERT(!ERTS_PROC_IS_EXITING(c_p)); result = vbf(c_p, reg, I); ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(result)); ERTS_CHK_MBUF_SZ(c_p); ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); ERTS_HOLE_CHECK(c_p); } PROCESS_MAIN_CHK_LOCKS(c_p); ERTS_REQ_PROC_MAIN_LOCK(c_p); return result; } #endif /* It is important that the below code is as optimized as possible. * When doing any changes, make sure to look at the estone bif_dispatch * benchmark to make sure you don't introduce any regressions. * * ARG3 = entry * ARG4 = export entry * RET = BIF pointer */ void BeamGlobalAssembler::emit_call_light_bif_shared() { x86::Mem entry_mem = TMP_MEM1q, export_mem = TMP_MEM2q, mbuf_mem = TMP_MEM3q; Label trace = a.newLabel(), yield = a.newLabel(); emit_enter_frame(); /* Spill everything we may need on the error and GC paths. */ a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, mbuf))); a.mov(entry_mem, ARG3); a.mov(export_mem, ARG4); a.mov(mbuf_mem, ARG1); /* Check if we should trace this bif call or handle save_calls. Both * variants dispatch through the export entry. */ a.cmp(x86::dword_ptr(ARG4, offsetof(Export, is_bif_traced)), imm(0)); a.jne(trace); a.cmp(active_code_ix, imm(ERTS_SAVE_CALLS_CODE_IX)); a.je(trace); a.dec(FCALLS); a.jle(yield); { Label check_bif_return = a.newLabel(), gc_after_bif_call = a.newLabel(); emit_enter_runtime(); #ifdef ERTS_MSACC_EXTENDED_STATES { Label skip_msacc = a.newLabel(); a.cmp(erts_msacc_cache, imm(0)); a.short_().je(skip_msacc); a.mov(TMP_MEM4q, ARG3); a.mov(ARG1, erts_msacc_cache); a.mov(ARG2, x86::qword_ptr(ARG4, offsetof(Export, info.mfa.module))); a.mov(ARG3, RET); runtime_call<3>(erts_msacc_set_bif_state); a.mov(ARG3, TMP_MEM4q); a.bind(skip_msacc); } #endif { /* Call the BIF proper. ARG3 and RET have been set earlier. */ a.mov(ARG1, c_p); load_x_reg_array(ARG2); #if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) a.mov(ARG4, RET); runtime_call<4>(debug_call_light_bif); #else runtime_call(RET, 3); #endif } #ifdef ERTS_MSACC_EXTENDED_STATES { Label skip_msacc = a.newLabel(); a.cmp(erts_msacc_cache, imm(0)); a.short_().je(skip_msacc); { /* Update cache if it was changed in the BIF, stashing the * return value in TMP_MEM4q. */ a.mov(TMP_MEM4q, RET); a.lea(ARG1, erts_msacc_cache); runtime_call<1>(erts_msacc_update_cache); a.mov(RET, TMP_MEM4q); /* set state to emulator if msacc has been enabled */ a.cmp(erts_msacc_cache, imm(0)); a.short_().je(skip_msacc); a.mov(ARG1, erts_msacc_cache); a.mov(ARG2, imm(ERTS_MSACC_STATE_EMULATOR)); a.mov(ARG3, imm(1)); runtime_call<3>(erts_msacc_set_state_m__); a.mov(RET, TMP_MEM4q); } a.bind(skip_msacc); } #endif /* We must update the active code index in case another process has * loaded new code, as the result of this BIF may be observable on * both ends. * * It doesn't matter whether the BIF modifies anything; if process * A loads new code and calls erlang:monotonic_time/0 soon after, * we'd break the illusion of atomic upgrades if process B still * ran old code after seeing a later timestamp from its own call to * erlang:monotonic_time/0. */ emit_leave_runtime(); /* ERTS_IS_GC_DESIRED_INTERNAL */ { /* Test whether GC is forced. */ a.test(x86::dword_ptr(c_p, offsetof(Process, flags)), imm(F_FORCE_GC | F_DISABLE_GC)); a.jne(gc_after_bif_call); /* Test if binary heap size should trigger GC. */ a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, bin_vheap_sz))); a.cmp(x86::qword_ptr(c_p, offsetof(Process, off_heap.overhead)), ARG1); a.ja(gc_after_bif_call); /* Test if heap fragment size is larger than remaining heap size. */ a.mov(ARG2, x86::qword_ptr(c_p, offsetof(Process, mbuf_sz))); a.lea(ARG1, x86::qword_ptr(HTOP, ARG2, 0, 3)); a.cmp(E, ARG1); a.jl(gc_after_bif_call); } /* ! FALL THROUGH ! */ a.bind(check_bif_return); { Label trap = a.newLabel(), error = a.newLabel(); emit_test_the_non_value(RET); a.short_().je(trap); a.mov(getXRef(0), RET); emit_leave_frame(); a.ret(); a.bind(trap); { a.cmp(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(TRAP)); a.short_().jne(error); #if !defined(NATIVE_ERLANG_STACK) a.pop(getCPRef()); #endif /* Trap out, our return address is on the Erlang stack. * * The BIF_TRAP macros all set up c_p->arity and c_p->current, * so we can use a simplified context switch. */ a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, i))); a.jmp(labels[context_switch_simplified]); } a.bind(error); { a.mov(ARG2, entry_mem); a.mov(ARG4, export_mem); a.add(ARG4, imm(offsetof(Export, info.mfa))); #if !defined(NATIVE_ERLANG_STACK) /* Discard the continuation pointer as it will never be * used. */ emit_unwind_frame(); #endif /* Overwrite the return address with the entry address to * ensure that only the entry address ends up in the stack * trace. */ if (erts_frame_layout == ERTS_FRAME_LAYOUT_RA) { a.mov(x86::qword_ptr(E), ARG2); } else { ASSERT(erts_frame_layout == ERTS_FRAME_LAYOUT_FP_RA); a.mov(x86::qword_ptr(E, 8), ARG2); } a.jmp(labels[raise_exception_shared]); } } a.bind(gc_after_bif_call); { a.mov(ARG2, mbuf_mem); a.mov(ARG5, export_mem); a.mov(ARG5, x86::qword_ptr(ARG5, offsetof(Export, info.mfa.arity))); emit_enter_runtime(); a.mov(ARG1, c_p); a.mov(ARG3, RET); load_x_reg_array(ARG4); runtime_call<5>(erts_gc_after_bif_call_lhf); emit_leave_runtime(); a.jmp(check_bif_return); } } a.bind(trace); { /* Tail call the export entry instead of the BIF. If we use the native * stack as the Erlang stack our return address is already on the * Erlang stack. Otherwise we will have to move the return address from * the native stack to the Erlang stack. */ emit_leave_frame(); #if !defined(NATIVE_ERLANG_STACK) /* The return address must be on the Erlang stack. */ a.pop(getCPRef()); #endif x86::Mem destination = emit_setup_dispatchable_call(ARG4); a.jmp(destination); } a.bind(yield); { a.mov(ARG2, x86::qword_ptr(ARG4, offsetof(Export, info.mfa.arity))); a.lea(ARG4, x86::qword_ptr(ARG4, offsetof(Export, info.mfa))); a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG2); a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG4); /* We'll find our way back through ARG3 (entry address). */ emit_unwind_frame(); a.jmp(labels[context_switch_simplified]); } } void BeamModuleAssembler::emit_call_light_bif(const ArgWord &Bif, const ArgExport &Exp) { Label entry = a.newLabel(); align_erlang_cp(); a.bind(entry); mov_arg(ARG4, Exp); a.mov(RET, imm(Bif.get())); a.lea(ARG3, x86::qword_ptr(entry)); if (logger.file()) { BeamFile_ImportEntry *e = &beam->imports.entries[Exp.get()]; comment("BIF: %T:%T/%d", e->module, e->function, e->arity); } fragment_call(ga->get_call_light_bif_shared()); } void BeamModuleAssembler::emit_send() { Label entry = a.newLabel(); /* This is essentially a mirror of call_light_bif, there's no point to * specializing send/2 anymore. We do it here because it's far more work to * do it in the loader. */ align_erlang_cp(); a.bind(entry); a.mov(ARG4, imm(BIF_TRAP_EXPORT(BIF_send_2))); a.mov(RET, imm(send_2)); a.lea(ARG3, x86::qword_ptr(entry)); fragment_call(ga->get_call_light_bif_shared()); } void BeamModuleAssembler::emit_nif_start() { /* load time only instruction */ } void BeamGlobalAssembler::emit_bif_nif_epilogue(void) { Label check_trap = a.newLabel(), trap = a.newLabel(), error = a.newLabel(); #ifdef ERTS_MSACC_EXTENDED_STATES { Label skip_msacc = a.newLabel(); a.cmp(erts_msacc_cache, 0); a.short_().je(skip_msacc); a.mov(TMP_MEM1q, RET); a.mov(ARG1, erts_msacc_cache); a.mov(ARG2, imm(ERTS_MSACC_STATE_EMULATOR)); a.mov(ARG3, imm(1)); runtime_call<3>(erts_msacc_set_state_m__); a.mov(RET, TMP_MEM1q); a.bind(skip_msacc); } #endif /* Another process may have loaded new code and somehow notified us through * this call, so we must update the active code index. */ emit_leave_runtime(); emit_test_the_non_value(RET); a.short_().je(check_trap); comment("Do return and dispatch to it"); a.mov(getXRef(0), RET); emit_leave_frame(); #ifdef NATIVE_ERLANG_STACK a.ret(); #else a.mov(RET, getCPRef()); a.mov(getCPRef(), imm(NIL)); a.jmp(RET); #endif a.bind(check_trap); a.cmp(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(TRAP)); a.jne(error); { comment("yield"); comment("test trap to hibernate"); a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, flags))); a.mov(ARG2, ARG1); a.and_(ARG2, imm(F_HIBERNATE_SCHED)); a.short_().je(trap); comment("do hibernate trap"); a.and_(ARG1, imm(~F_HIBERNATE_SCHED)); a.mov(x86::qword_ptr(c_p, offsetof(Process, flags)), ARG1); a.jmp(labels[do_schedule]); } a.bind(trap); { comment("do normal trap"); /* The BIF_TRAP macros all set up c_p->arity and c_p->current, so we * can use a simplified context switch. */ a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, i))); a.jmp(labels[context_switch_simplified]); } a.bind(error); { a.mov(ARG2, E); emit_enter_runtime(); a.mov(ARG1, c_p); runtime_call<2>(erts_printable_return_address); emit_leave_runtime(); a.mov(ARG2, RET); a.mov(ARG4, x86::qword_ptr(c_p, offsetof(Process, current))); a.jmp(labels[raise_exception_shared]); } } /* Used by call_bif, dispatch_bif, and export_trampoline. * * Note that we don't check reductions here as we may have jumped here through * interpreted code (e.g. an ErtsNativeFunc or export entry) and it's very * tricky to yield back. Reductions are checked in module code instead. * * ARG2 = BIF MFA * ARG3 = I (rip), doesn't need to point past an MFA * ARG4 = function to be called */ void BeamGlobalAssembler::emit_call_bif_shared(void) { /* "Heavy" BIFs need up-to-date values for `c_p->i`, `c_p->current`, and * `c_p->arity`. */ a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG2); /* `call_bif` wants arity in ARG5. */ a.mov(ARG5, x86::qword_ptr(ARG2, offsetof(ErtsCodeMFA, arity))); a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG5); a.mov(x86::qword_ptr(c_p, offsetof(Process, i)), ARG3); /* The corresponding leave can be found in the epilogue. */ emit_enter_runtime(); #ifdef ERTS_MSACC_EXTENDED_STATES { Label skip_msacc = a.newLabel(); a.cmp(erts_msacc_cache, 0); a.short_().je(skip_msacc); a.mov(TMP_MEM1q, ARG3); a.mov(TMP_MEM2q, ARG5); a.mov(ARG1, erts_msacc_cache); a.mov(ARG2, x86::qword_ptr(ARG2, offsetof(ErtsCodeMFA, module))); a.mov(ARG3, ARG4); runtime_call<3>(erts_msacc_set_bif_state); a.mov(ARG4, RET); a.mov(ARG3, TMP_MEM1q); a.mov(ARG5, TMP_MEM2q); a.bind(skip_msacc); } #endif a.mov(ARG1, c_p); load_x_reg_array(ARG2); /* ARG3 (I), ARG4 (func), and ARG5 (arity) have already been provided. */ runtime_call<5>(beam_jit_call_bif); #ifdef ERTS_MSACC_EXTENDED_STATES a.mov(TMP_MEM1q, RET); a.lea(ARG1, erts_msacc_cache); runtime_call<1>(erts_msacc_update_cache); a.mov(RET, TMP_MEM1q); #endif emit_bif_nif_epilogue(); } void BeamGlobalAssembler::emit_dispatch_bif(void) { /* c_p->i points into the trampoline of a ErtsNativeFunc, right after the * `info` structure. */ a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, i))); ERTS_CT_ASSERT(offsetof(ErtsNativeFunc, trampoline.call_bif_nif) == sizeof(ErtsCodeInfo)); ssize_t mfa_offset = offsetof(ErtsNativeFunc, trampoline.info.mfa) - offsetof(ErtsNativeFunc, trampoline.call_bif_nif); a.lea(ARG2, x86::qword_ptr(ARG3, mfa_offset)); ssize_t dfunc_offset = offsetof(ErtsNativeFunc, trampoline.dfunc) - offsetof(ErtsNativeFunc, trampoline.call_bif_nif); a.mov(ARG4, x86::qword_ptr(ARG3, dfunc_offset)); a.jmp(labels[call_bif_shared]); } void BeamModuleAssembler::emit_call_bif(const ArgWord &Func) { int mfa_offset = -(int)sizeof(ErtsCodeMFA); Label entry = a.newLabel(); /* This is _always_ the first instruction in a function and replaces the * yield test that would otherwise add a frame, so we must add a frame * here. */ emit_enter_frame(); /* Yield entry point; must be after entering frame. */ a.bind(entry); { a.lea(ARG2, x86::qword_ptr(current_label, mfa_offset)); a.lea(ARG3, x86::qword_ptr(entry)); mov_arg(ARG4, Func); a.jmp(resolve_fragment(ga->get_call_bif_shared())); } } void BeamModuleAssembler::emit_call_bif_mfa(const ArgAtom &M, const ArgAtom &F, const ArgWord &A) { BeamInstr func; Export *e; e = erts_active_export_entry(M.get(), F.get(), A.get()); ASSERT(e != NULL && e->bif_number != -1); comment("HBIF: %T:%T/%d", e->info.mfa.module, e->info.mfa.function, A.get()); func = (BeamInstr)bif_table[e->bif_number].f; emit_call_bif(ArgWord(func)); } void BeamGlobalAssembler::emit_call_nif_early() { /* Fetch and align the return address so we can tell where we came from. It * points just after the trampoline word so we'll need to skip that to find * our ErtsCodeInfo. */ a.mov(ARG2, x86::qword_ptr(x86::rsp)); a.sub(ARG2, imm(sizeof(UWord) + sizeof(ErtsCodeInfo))); #ifdef DEBUG { Label next = a.newLabel(); /* Crash if our return address isn't word-aligned. */ a.test(ARG2, imm(sizeof(UWord) - 1)); a.short_().je(next); comment("# Return address isn't word-aligned"); a.ud2(); a.bind(next); } #endif emit_enter_runtime(); a.mov(ARG1, c_p); runtime_call<2>(erts_call_nif_early); emit_leave_runtime(); /* We won't return to the original code. We KNOW that the stack points at * a return address. */ a.add(x86::rsp, imm(8)); /* Emulate `emit_call_nif`, loading the current (phony) instruction * pointer into ARG2. We push a (redundant) frame pointer to match the * corresponding `emit_leave_frame` in `call_nif_shared`. */ emit_enter_frame(); a.mov(ARG3, RET); a.jmp(labels[call_nif_shared]); } /* Used by call_nif, call_nif_early, and dispatch_nif. * * Note that we don't check reductions here as we may have jumped here through * interpreted code (e.g. an ErtsNativeFunc or export entry) and it's very * tricky to yield back. Reductions are checked in module code instead. * * ARG3 = current I, just past the end of an ErtsCodeInfo. */ void BeamGlobalAssembler::emit_call_nif_shared(void) { /* The corresponding leave can be found in the epilogue. */ emit_enter_runtime(); #ifdef ERTS_MSACC_EXTENDED_STATES { Label skip_msacc = a.newLabel(); a.cmp(erts_msacc_cache, 0); a.short_().je(skip_msacc); a.mov(TMP_MEM1q, ARG3); a.mov(ARG1, erts_msacc_cache); a.mov(ARG2, imm(ERTS_MSACC_STATE_NIF)); a.mov(ARG3, imm(1)); runtime_call<3>(erts_msacc_set_state_m__); a.mov(ARG3, TMP_MEM1q); a.bind(skip_msacc); } #endif a.mov(ARG1, c_p); a.mov(ARG2, ARG3); load_x_reg_array(ARG3); a.mov(ARG4, x86::qword_ptr(ARG2, 8 + BEAM_ASM_FUNC_PROLOGUE_SIZE)); a.mov(ARG5, x86::qword_ptr(ARG2, 16 + BEAM_ASM_FUNC_PROLOGUE_SIZE)); a.mov(ARG6, x86::qword_ptr(ARG2, 24 + BEAM_ASM_FUNC_PROLOGUE_SIZE)); runtime_call<5>(beam_jit_call_nif); emit_bif_nif_epilogue(); } void BeamGlobalAssembler::emit_dispatch_nif(void) { /* c_p->i points into the trampoline of a ErtsNativeFunc, right after the * `info` structure. * * ErtsNativeFunc already follows the NIF call layout, so we don't need to * do anything beyond loading the address. */ ERTS_CT_ASSERT(offsetof(ErtsNativeFunc, trampoline.call_bif_nif) == sizeof(ErtsCodeInfo)); a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, i))); a.jmp(labels[call_nif_shared]); } void BeamGlobalAssembler::emit_call_nif_yield_helper() { Label yield = a.newLabel(); a.dec(FCALLS); a.short_().jl(yield); a.jmp(labels[call_nif_shared]); a.bind(yield); { int mfa_offset = -(int)sizeof(ErtsCodeMFA); int arity_offset = mfa_offset + (int)offsetof(ErtsCodeMFA, arity); a.mov(ARG1, x86::qword_ptr(ARG3, arity_offset)); a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG1); a.lea(ARG1, x86::qword_ptr(ARG3, mfa_offset)); a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG1); /* Yield to `dispatch` rather than `entry` to avoid pushing too many * frames to the stack. See `emit_call_nif` for details. */ a.add(ARG3, imm(BEAM_ASM_NFUNC_SIZE + sizeof(UWord[3]))); a.jmp(labels[context_switch_simplified]); } } /* WARNING: This stub is memcpy'd, so all code herein must be explicitly * position-independent. */ void BeamModuleAssembler::emit_call_nif(const ArgWord &Func, const ArgWord &NifMod, const ArgWord &DirtyFunc) { Label entry = a.newLabel(), dispatch = a.newLabel(); /* The start of this function must mimic the layout of ErtsNativeFunc. * * We jump here on the very first entry, pushing a stack frame if * applicable. */ a.bind(entry); { emit_enter_frame(); a.short_().jmp(dispatch); /* call_op */ a.align(AlignMode::kCode, 8); /* ErtsNativeFunc.dfunc */ a.embedUInt64(Func.get()); /* ErtsNativeFunc.m */ a.embedUInt64(NifMod.get()); /* ErtsNativeFunc.func */ a.embedUInt64(DirtyFunc.get()); } /* `emit_call_nif_yield_helper` relies on this to compute the address of * `dispatch` */ ASSERT((a.offset() - code.labelOffsetFromBase(current_label)) == BEAM_ASM_NFUNC_SIZE + sizeof(UWord[3])); a.bind(dispatch); { a.lea(ARG3, x86::qword_ptr(current_label)); pic_jmp(ga->get_call_nif_yield_helper()); } } /* ARG2 = entry address. */ void BeamGlobalAssembler::emit_i_load_nif_shared() { static ErtsCodeMFA bif_mfa = {am_erlang, am_load_nif, 2}; Label yield = a.newLabel(), error = a.newLabel(); a.mov(TMP_MEM1q, ARG2); emit_enter_runtime(); a.mov(ARG1, c_p); /* ARG2 has already been set by caller */ load_x_reg_array(ARG3); runtime_call<3>(beam_jit_load_nif); emit_leave_runtime(); a.cmp(RET, RET_NIF_yield); a.short_().je(yield); /* We entered the frame in module code. */ emit_leave_frame(); a.cmp(RET, RET_NIF_success); a.short_().jne(error); a.ret(); a.bind(error); { a.mov(ARG4, imm(&bif_mfa)); a.jmp(labels[raise_exception]); } a.bind(yield); { a.mov(ARG3, TMP_MEM1q); a.jmp(labels[context_switch_simplified]); } } static ErtsCodePtr get_on_load_address(Process *c_p, Eterm module) { const Module *modp = erts_get_module(module, erts_active_code_ix()); if (modp && modp->on_load) { const BeamCodeHeader *hdr = (modp->on_load)->code_hdr; if (hdr) { return erts_codeinfo_to_code(hdr->on_load); } } c_p->freason = BADARG; return NULL; } /* Implements the internal and undocumented erlang:call_on_load_function/1, * which is tricky to implement in the face of frame pointers. */ void BeamModuleAssembler::emit_i_call_on_load_function() { static ErtsCodeMFA mfa = {am_erlang, am_call_on_load_function, 1}; Label next = a.newLabel(); emit_enter_runtime(); a.mov(ARG1, c_p); a.mov(ARG2, getXRef(0)); runtime_call<2>(get_on_load_address); emit_leave_runtime(); a.test(RET, RET); a.jne(next); emit_raise_exception(&mfa); a.bind(next); erlang_call(RET, ARG1); } #ifdef NATIVE_ERLANG_STACK void BeamModuleAssembler::emit_i_load_nif() { Label entry = a.newLabel(), yield = a.newLabel(), next = a.newLabel(); /* i_load_nif is a rewrite of a call_ext instruction, so we'll body-call * ourselves to ensure the stack is consistent with that. This greatly * simplifies yielding and error handling. */ fragment_call(entry); a.short_().jmp(next); align_erlang_cp(); a.bind(entry); { emit_enter_frame(); a.bind(yield); { a.lea(ARG2, x86::qword_ptr(yield)); a.jmp(resolve_fragment(ga->get_i_load_nif_shared())); } } a.bind(next); } #else void BeamModuleAssembler::emit_i_load_nif() { static ErtsCodeMFA mfa = {am_erlang, am_load_nif, 2}; Label entry = a.newLabel(), next = a.newLabel(), schedule = a.newLabel(); align_erlang_cp(); a.bind(entry); emit_enter_runtime(); a.mov(ARG1, c_p); a.lea(ARG2, x86::qword_ptr(current_label)); load_x_reg_array(ARG3); runtime_call<3>(beam_jit_load_nif); emit_leave_runtime(); a.cmp(RET, imm(RET_NIF_yield)); a.je(schedule); a.cmp(RET, imm(RET_NIF_success)); a.je(next); emit_raise_exception(current_label, &mfa); a.bind(schedule); { a.lea(ARG3, x86::qword_ptr(entry)); a.jmp(resolve_fragment(ga->get_context_switch_simplified())); } a.bind(next); } #endif