/* ** Definitions for target CPU. ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_H #define _LJ_TARGET_H #include "lj_def.h" #include "lj_arch.h" /* -- Registers and spill slots ------------------------------------------- */ /* Register type (uint8_t in ir->r). */ typedef uint32_t Reg; /* The hi-bit is NOT set for an allocated register. This means the value ** can be directly used without masking. The hi-bit is set for a register ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK. */ #define RID_NONE 0x80 #define RID_MASK 0x7f #define RID_INIT (RID_NONE|RID_MASK) #define RID_SINK (RID_INIT-1) #define RID_SUNK (RID_INIT-2) #define ra_noreg(r) ((r) & RID_NONE) #define ra_hasreg(r) (!((r) & RID_NONE)) /* The ra_hashint() macro assumes a previous test for ra_noreg(). */ #define ra_hashint(r) ((r) < RID_SUNK) #define ra_gethint(r) ((Reg)((r) & RID_MASK)) #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) /* Spill slot 0 means no spill slot has been allocated. */ #define SPS_NONE 0 #define ra_hasspill(s) ((s) != SPS_NONE) /* Combined register and spill slot (uint16_t in ir->prev). */ typedef uint32_t RegSP; #define REGSP(r, s) ((r) + ((s) << 8)) #define REGSP_HINT(r) ((r)|RID_NONE) #define REGSP_INIT REGSP(RID_INIT, 0) #define regsp_reg(rs) ((rs) & 255) #define regsp_spill(rs) ((rs) >> 8) #define regsp_used(rs) \ (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0)) /* -- Register sets ------------------------------------------------------- */ /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ #if LJ_TARGET_PPC || LJ_TARGET_MIPS typedef uint64_t RegSet; #else typedef uint32_t RegSet; #endif #define RID2RSET(r) (((RegSet)1) << (r)) #define RSET_EMPTY ((RegSet)0) #define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo)) #define rset_test(rs, r) ((int)((rs) >> (r)) & 1) #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) #if LJ_TARGET_PPC || LJ_TARGET_MIPS #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) #else #define rset_picktop(rs) ((Reg)lj_fls(rs)) #define rset_pickbot(rs) ((Reg)lj_ffs(rs)) #endif /* -- Register allocation cost -------------------------------------------- */ /* The register allocation heuristic keeps track of the cost for allocating ** a specific register: ** ** A free register (obviously) has a cost of 0 and a 1-bit in the free mask. ** ** An already allocated register has the (non-zero) IR reference in the lowest ** bits and the result of a blended cost-model in the higher bits. ** ** The allocator first checks the free mask for a hit. Otherwise an (unrolled) ** linear search for the minimum cost is used. The search doesn't need to ** keep track of the position of the minimum, which makes it very fast. ** The lowest bits of the minimum cost show the desired IR reference whose ** register is the one to evict. ** ** Without the cost-model this degenerates to the standard heuristics for ** (reverse) linear-scan register allocation. Since code generation is done ** in reverse, a live interval extends from the last use to the first def. ** For an SSA IR the IR reference is the first (and only) def and thus ** trivially marks the end of the interval. The LSRA heuristics says to pick ** the register whose live interval has the furthest extent, i.e. the lowest ** IR reference in our case. ** ** A cost-model should take into account other factors, like spill-cost and ** restore- or rematerialization-cost, which depend on the kind of instruction. ** E.g. constants have zero spill costs, variant instructions have higher ** costs than invariants and PHIs should preferably never be spilled. ** ** Here's a first cut at simple, but effective blended cost-model for R-LSRA: ** - Due to careful design of the IR, constants already have lower IR ** references than invariants and invariants have lower IR references ** than variants. ** - The cost in the upper 16 bits is the sum of the IR reference and a ** weighted score. The score currently only takes into account whether ** the IRT_ISPHI bit is set in the instruction type. ** - The PHI weight is the minimum distance (in IR instructions) a PHI ** reference has to be further apart from a non-PHI reference to be spilled. ** - It should be a power of two (for speed) and must be between 2 and 32768. ** Good values for the PHI weight seem to be between 40 and 150. ** - Further study is required. */ #define REGCOST_PHI_WEIGHT 64 /* Cost for allocating a specific register. */ typedef uint32_t RegCost; /* Note: assumes 16 bit IRRef1. */ #define REGCOST(cost, ref) ((RegCost)(ref) + ((RegCost)(cost) << 16)) #define regcost_ref(rc) ((IRRef1)(rc)) #define REGCOST_T(t) \ ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI)) #define REGCOST_REF_T(ref, t) (REGCOST((ref), (ref)) + REGCOST_T((t))) /* -- Target-specific definitions ----------------------------------------- */ #if LJ_TARGET_X86ORX64 #include "lj_target_x86.h" #elif LJ_TARGET_ARM #include "lj_target_arm.h" #elif LJ_TARGET_PPC #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS #include "lj_target_mips.h" #else #error "Missing include for target CPU" #endif #ifdef EXITSTUBS_PER_GROUP /* Return the address of an exit stub. */ static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) { lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL); return (char *)group[exitno / EXITSTUBS_PER_GROUP] + EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ #define exitstub_addr(J, exitno) \ ((MCode *)exitstub_addr_((char **)((J)->exitstubgroup), (exitno))) #endif #endif