diff options
Diffstat (limited to 'js/src/nanojit/LIR.h')
-rw-r--r-- | js/src/nanojit/LIR.h | 2443 |
1 files changed, 2443 insertions, 0 deletions
diff --git a/js/src/nanojit/LIR.h b/js/src/nanojit/LIR.h new file mode 100644 index 0000000..4d6f03f --- /dev/null +++ b/js/src/nanojit/LIR.h @@ -0,0 +1,2443 @@ +/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */ +/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is [Open Source Virtual Machine]. + * + * The Initial Developer of the Original Code is + * Adobe System Incorporated. + * Portions created by the Initial Developer are Copyright (C) 2004-2007 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Adobe AS3 Team + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef __nanojit_LIR__ +#define __nanojit_LIR__ + +namespace nanojit +{ + enum LOpcode +#if defined(_MSC_VER) && _MSC_VER >= 1400 +#pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum + : unsigned +#endif + { +#define OP___(op, number, repKind, retType, isCse) \ + LIR_##op = (number), +#include "LIRopcode.tbl" + LIR_sentinel, +#undef OP___ + +#ifdef NANOJIT_64BIT +# define PTR_SIZE(a,b) b +#else +# define PTR_SIZE(a,b) a +#endif + + // Pointer-sized synonyms. + + LIR_paramp = PTR_SIZE(LIR_parami, LIR_paramq), + + LIR_retp = PTR_SIZE(LIR_reti, LIR_retq), + + LIR_livep = PTR_SIZE(LIR_livei, LIR_liveq), + + LIR_ldp = PTR_SIZE(LIR_ldi, LIR_ldq), + + LIR_stp = PTR_SIZE(LIR_sti, LIR_stq), + + LIR_callp = PTR_SIZE(LIR_calli, LIR_callq), + + LIR_eqp = PTR_SIZE(LIR_eqi, LIR_eqq), + LIR_ltp = PTR_SIZE(LIR_lti, LIR_ltq), + LIR_gtp = PTR_SIZE(LIR_gti, LIR_gtq), + LIR_lep = PTR_SIZE(LIR_lei, LIR_leq), + LIR_gep = PTR_SIZE(LIR_gei, LIR_geq), + LIR_ltup = PTR_SIZE(LIR_ltui, LIR_ltuq), + LIR_gtup = PTR_SIZE(LIR_gtui, LIR_gtuq), + LIR_leup = PTR_SIZE(LIR_leui, LIR_leuq), + LIR_geup = PTR_SIZE(LIR_geui, LIR_geuq), + + LIR_addp = PTR_SIZE(LIR_addi, LIR_addq), + LIR_subp = PTR_SIZE(LIR_subi, LIR_subq), + LIR_addjovp = PTR_SIZE(LIR_addjovi, LIR_addjovq), + + LIR_andp = PTR_SIZE(LIR_andi, LIR_andq), + LIR_orp = PTR_SIZE(LIR_ori, LIR_orq), + LIR_xorp = PTR_SIZE(LIR_xori, LIR_xorq), + + LIR_lshp = PTR_SIZE(LIR_lshi, LIR_lshq), + LIR_rshp = PTR_SIZE(LIR_rshi, LIR_rshq), + LIR_rshup = PTR_SIZE(LIR_rshui, LIR_rshuq), + + LIR_cmovp = PTR_SIZE(LIR_cmovi, LIR_cmovq) + }; + + // 32-bit integer comparisons must be contiguous, as must 64-bit integer + // comparisons and 64-bit float comparisons. + NanoStaticAssert(LIR_eqi + 1 == LIR_lti && + LIR_eqi + 2 == LIR_gti && + LIR_eqi + 3 == LIR_lei && + LIR_eqi + 4 == LIR_gei && + LIR_eqi + 5 == LIR_ltui && + LIR_eqi + 6 == LIR_gtui && + LIR_eqi + 7 == LIR_leui && + LIR_eqi + 8 == LIR_geui); +#ifdef NANOJIT_64BIT + NanoStaticAssert(LIR_eqq + 1 == LIR_ltq && + LIR_eqq + 2 == LIR_gtq && + LIR_eqq + 3 == LIR_leq && + LIR_eqq + 4 == LIR_geq && + LIR_eqq + 5 == LIR_ltuq && + LIR_eqq + 6 == LIR_gtuq && + LIR_eqq + 7 == LIR_leuq && + LIR_eqq + 8 == LIR_geuq); +#endif + NanoStaticAssert(LIR_eqd + 1 == LIR_ltd && + LIR_eqd + 2 == LIR_gtd && + LIR_eqd + 3 == LIR_led && + LIR_eqd + 4 == LIR_ged); + + // Various opcodes must be changeable to their opposite with op^1 + // (although we use invertXyz() when possible, ie. outside static + // assertions). + NanoStaticAssert((LIR_jt^1) == LIR_jf && (LIR_jf^1) == LIR_jt); + + NanoStaticAssert((LIR_xt^1) == LIR_xf && (LIR_xf^1) == LIR_xt); + + NanoStaticAssert((LIR_lti^1) == LIR_gti && (LIR_gti^1) == LIR_lti); + NanoStaticAssert((LIR_lei^1) == LIR_gei && (LIR_gei^1) == LIR_lei); + NanoStaticAssert((LIR_ltui^1) == LIR_gtui && (LIR_gtui^1) == LIR_ltui); + NanoStaticAssert((LIR_leui^1) == LIR_geui && (LIR_geui^1) == LIR_leui); + +#ifdef NANOJIT_64BIT + NanoStaticAssert((LIR_ltq^1) == LIR_gtq && (LIR_gtq^1) == LIR_ltq); + NanoStaticAssert((LIR_leq^1) == LIR_geq && (LIR_geq^1) == LIR_leq); + NanoStaticAssert((LIR_ltuq^1) == LIR_gtuq && (LIR_gtuq^1) == LIR_ltuq); + NanoStaticAssert((LIR_leuq^1) == LIR_geuq && (LIR_geuq^1) == LIR_leuq); +#endif + + NanoStaticAssert((LIR_ltd^1) == LIR_gtd && (LIR_gtd^1) == LIR_ltd); + NanoStaticAssert((LIR_led^1) == LIR_ged && (LIR_ged^1) == LIR_led); + + + struct GuardRecord; + struct SideExit; + + enum AbiKind { + ABI_FASTCALL, + ABI_THISCALL, + ABI_STDCALL, + ABI_CDECL + }; + + // This is much the same as LTy, but we need to distinguish signed and + // unsigned 32-bit ints so that they will be extended to 64-bits correctly + // on 64-bit platforms. + // + // All values must fit into three bits. See CallInfo for details. + enum ArgType { + ARGTYPE_V = 0, // void + ARGTYPE_I = 1, // int32_t + ARGTYPE_UI = 2, // uint32_t +#ifdef NANOJIT_64BIT + ARGTYPE_Q = 3, // uint64_t +#endif + ARGTYPE_D = 4, // double + + // aliases + ARGTYPE_P = PTR_SIZE(ARGTYPE_I, ARGTYPE_Q), // pointer + ARGTYPE_B = ARGTYPE_I // bool + }; + + enum IndirectCall { + CALL_INDIRECT = 0 + }; + + //----------------------------------------------------------------------- + // Aliasing + // -------- + // *Aliasing* occurs when a single memory location can be accessed through + // multiple names. For example, consider this code: + // + // ld a[0] + // sti b[0] + // ld a[0] + // + // In general, it's possible that a[0] and b[0] may refer to the same + // memory location. This means, for example, that you cannot safely + // perform CSE on the two loads. However, if you know that 'a' cannot be + // an alias of 'b' (ie. the two loads do not alias with the store) then + // you can safely perform CSE. + // + // Access regions + // -------------- + // Doing alias analysis precisely is difficult. But it turns out that + // keeping track of aliasing at a coarse level is enough to help with many + // optimisations. So we conceptually divide the memory that is accessible + // from LIR into a small number of "access regions" (aka. "Acc"). An + // access region may be non-contiguous. No two access regions can + // overlap. The union of all access regions covers all memory accessible + // from LIR. + // + // In general a (static) load or store may be executed more than once, and + // thus may access multiple regions; however, in practice almost all + // loads and stores will obviously access only a single region. A + // function called from LIR may load and/or store multiple access regions + // (even if executed only once). + // + // If two loads/stores/calls are known to not access the same region(s), + // then they do not alias. + // + // All regions are defined by the embedding. It makes sense to add new + // embedding-specific access regions when doing so will help with one or + // more optimisations. + // + // Access region sets and instruction markings + // ------------------------------------------- + // Each load/store is marked with an "access region set" (aka. "AccSet"), + // which is a set of one or more access regions. This indicates which + // parts of LIR-accessible memory the load/store may touch. + // + // Each function called from LIR is also marked with an access region set + // for memory stored to by the function. (We could also have a marking + // for memory loads done by the function, but there's no need at the + // moment.) These markings apply to the function itself, not the call + // site, ie. they're not context-sensitive. + // + // These load/store/call markings MUST BE ACCURATE -- if not then invalid + // optimisations might occur that change the meaning of the code. + // However, they can safely be imprecise (ie. conservative), ie. a + // load/store/call can be marked with an access region set that is a + // superset of the actual access region set. Such imprecision is safe but + // may reduce optimisation opportunities. + // + // Optimisations that use access region info + // ----------------------------------------- + // Currently only CseFilter uses this, and only for determining whether + // loads can be CSE'd. Note that CseFilter treats loads that are marked + // with a single access region precisely, but all loads marked with + // multiple access regions get lumped together. So if you can't mark a + // load with a single access region, you might as well use ACC_LOAD_ANY. + //----------------------------------------------------------------------- + + // An access region set is represented as a bitset. Using a uint32_t + // restricts us to at most 32 alias regions for the moment. This could be + // expanded to a uint64_t easily if needed. + typedef uint32_t AccSet; + static const int NUM_ACCS = sizeof(AccSet) * 8; + + // Some common (non-singleton) access region sets. ACCSET_NONE does not make + // sense for loads or stores (which must access at least one region), it + // only makes sense for calls. + // + static const AccSet ACCSET_NONE = 0x0; + static const AccSet ACCSET_ALL = 0xffffffff; + static const AccSet ACCSET_LOAD_ANY = ACCSET_ALL; // synonym + static const AccSet ACCSET_STORE_ANY = ACCSET_ALL; // synonym + + inline bool isSingletonAccSet(AccSet accSet) { + // This is a neat way of testing if a value has only one bit set. + return (accSet & (accSet - 1)) == 0; + } + + // Full AccSets don't fit into load and store instructions. But + // load/store AccSets almost always contain a single access region. We + // take advantage of this to create a compressed AccSet, MiniAccSet, that + // does fit. + // + // The 32 single-region AccSets get compressed into a number in the range + // 0..31 (according to the position of the set bit), and all other + // (multi-region) AccSets get converted into MINI_ACCSET_MULTIPLE. So the + // representation is lossy in the latter case, but that case is rare for + // loads/stores. We use a full AccSet for the storeAccSets of calls, for + // which multi-region AccSets are common. + // + // We wrap the uint8_t inside a struct to avoid the possiblity of subtle + // bugs caused by mixing up AccSet and MiniAccSet, which is easy to do. + // However, the struct gets padded inside LInsLd in an inconsistent way on + // Windows, so we actually store a MiniAccSetVal inside LInsLd. Sigh. + // But we use MiniAccSet everywhere else. + // + typedef uint8_t MiniAccSetVal; + struct MiniAccSet { MiniAccSetVal val; }; + static const MiniAccSet MINI_ACCSET_MULTIPLE = { 99 }; + + static MiniAccSet compressAccSet(AccSet accSet) { + if (isSingletonAccSet(accSet)) { + MiniAccSet ret = { uint8_t(msbSet32(accSet)) }; + return ret; + } + + // If we got here, it must be a multi-region AccSet. + return MINI_ACCSET_MULTIPLE; + } + + static AccSet decompressMiniAccSet(MiniAccSet miniAccSet) { + return (miniAccSet.val == MINI_ACCSET_MULTIPLE.val) ? ACCSET_ALL : (1 << miniAccSet.val); + } + + // The LoadQual affects how a load can be optimised: + // + // - CONST: These loads are guaranteed to always return the same value + // during a single execution of a fragment (but the value is allowed to + // change between executions of the fragment). This means that the + // location is never stored to by the LIR, and is never modified by an + // external entity while the fragment is running. + // + // - NORMAL: These loads may be stored to by the LIR, but are never + // modified by an external entity while the fragment is running. + // + // - VOLATILE: These loads may be stored to by the LIR, and may be + // modified by an external entity while the fragment is running. + // + // This gives a lattice with the ordering: CONST < NORMAL < VOLATILE. + // As usual, it's safe to mark a load with a value higher (less precise) + // that actual, but it may result in fewer optimisations occurring. + // + // Generally CONST loads are highly amenable to optimisation (eg. CSE), + // VOLATILE loads are entirely unoptimisable, and NORMAL loads are in + // between and require some alias analysis to optimise. + // + // Note that CONST has a stronger meaning to "const" in C and C++; in C + // and C++ a "const" variable may be modified by an external entity, such + // as hardware. Hence "const volatile" makes sense in C and C++, but + // CONST+VOLATILE doesn't make sense in LIR. + // + // Note also that a 2-bit bitfield in LInsLd is used to hold LoadQual + // values, so you can one add one more value without expanding it. + // + enum LoadQual { + LOAD_CONST = 0, + LOAD_NORMAL = 1, + LOAD_VOLATILE = 2 + }; + + struct CallInfo + { + private: + // In CallInfo::_typesig, each entry is three bits. + static const int TYPESIG_FIELDSZB = 3; + static const int TYPESIG_FIELDMASK = 7; + + public: + uintptr_t _address; + uint32_t _typesig:27; // 9 3-bit fields indicating arg type, by ARGTYPE above (including ret type): a1 a2 a3 a4 a5 ret + AbiKind _abi:3; + uint32_t _isPure:1; // _isPure=1 means no side-effects, result only depends on args + AccSet _storeAccSet; // access regions stored by the function + verbose_only ( const char* _name; ) + + // The following encode 'r func()' through to 'r func(a1, a2, a3, a4, a5, a6, a7, a8)'. + static inline uint32_t typeSig0(ArgType r) { + return r; + } + static inline uint32_t typeSig1(ArgType r, ArgType a1) { + return a1 << TYPESIG_FIELDSZB*1 | typeSig0(r); + } + static inline uint32_t typeSig2(ArgType r, ArgType a1, ArgType a2) { + return a1 << TYPESIG_FIELDSZB*2 | typeSig1(r, a2); + } + static inline uint32_t typeSig3(ArgType r, ArgType a1, ArgType a2, ArgType a3) { + return a1 << TYPESIG_FIELDSZB*3 | typeSig2(r, a2, a3); + } + static inline uint32_t typeSig4(ArgType r, ArgType a1, ArgType a2, ArgType a3, ArgType a4) { + return a1 << TYPESIG_FIELDSZB*4 | typeSig3(r, a2, a3, a4); + } + static inline uint32_t typeSig5(ArgType r, ArgType a1, ArgType a2, ArgType a3, + ArgType a4, ArgType a5) { + return a1 << TYPESIG_FIELDSZB*5 | typeSig4(r, a2, a3, a4, a5); + } + static inline uint32_t typeSig6(ArgType r, ArgType a1, ArgType a2, ArgType a3, + ArgType a4, ArgType a5, ArgType a6) { + return a1 << TYPESIG_FIELDSZB*6 | typeSig5(r, a2, a3, a4, a5, a6); + } + static inline uint32_t typeSig7(ArgType r, ArgType a1, ArgType a2, ArgType a3, + ArgType a4, ArgType a5, ArgType a6, ArgType a7) { + return a1 << TYPESIG_FIELDSZB*7 | typeSig6(r, a2, a3, a4, a5, a6, a7); + } + static inline uint32_t typeSig8(ArgType r, ArgType a1, ArgType a2, ArgType a3, ArgType a4, + ArgType a5, ArgType a6, ArgType a7, ArgType a8) { + return a1 << TYPESIG_FIELDSZB*8 | typeSig7(r, a2, a3, a4, a5, a6, a7, a8); + } + // Encode 'r func(a1, ..., aN))' + static inline uint32_t typeSigN(ArgType r, int N, ArgType a[]) { + uint32_t typesig = r; + for (int i = 0; i < N; i++) { + typesig |= a[i] << TYPESIG_FIELDSZB*(N-i); + } + return typesig; + } + + uint32_t count_args() const; + uint32_t count_int32_args() const; + // Nb: uses right-to-left order, eg. sizes[0] is the size of the right-most arg. + // XXX: See bug 525815 for fixing this. + uint32_t getArgTypes(ArgType* types) const; + + inline ArgType returnType() const { + return ArgType(_typesig & TYPESIG_FIELDMASK); + } + + inline bool isIndirect() const { + return _address < 256; + } + }; + + /* + * Record for extra data used to compile switches as jump tables. + */ + struct SwitchInfo + { + NIns** table; // Jump table; a jump address is NIns* + uint32_t count; // Number of table entries + // Index value at last execution of the switch. The index value + // is the offset into the jump table. Thus it is computed as + // (switch expression) - (lowest case value). + uint32_t index; + }; + + // Array holding the 'isCse' field from LIRopcode.tbl. + extern const int8_t isCses[]; // cannot be uint8_t, some values are negative + + inline bool isCseOpcode(LOpcode op) { + NanoAssert(isCses[op] != -1); // see LIRopcode.tbl to understand this + return isCses[op] == 1; + } + inline bool isLiveOpcode(LOpcode op) { + return +#if defined NANOJIT_64BIT + op == LIR_liveq || +#endif + op == LIR_livei || op == LIR_lived; + } + inline bool isRetOpcode(LOpcode op) { + return +#if defined NANOJIT_64BIT + op == LIR_retq || +#endif + op == LIR_reti || op == LIR_retd; + } + inline bool isCmovOpcode(LOpcode op) { + return +#if defined NANOJIT_64BIT + op == LIR_cmovq || +#endif + op == LIR_cmovi || + op == LIR_cmovd; + } + inline bool isCmpIOpcode(LOpcode op) { + return LIR_eqi <= op && op <= LIR_geui; + } + inline bool isCmpSIOpcode(LOpcode op) { + return LIR_eqi <= op && op <= LIR_gei; + } + inline bool isCmpUIOpcode(LOpcode op) { + return LIR_eqi == op || (LIR_ltui <= op && op <= LIR_geui); + } +#ifdef NANOJIT_64BIT + inline bool isCmpQOpcode(LOpcode op) { + return LIR_eqq <= op && op <= LIR_geuq; + } + inline bool isCmpSQOpcode(LOpcode op) { + return LIR_eqq <= op && op <= LIR_geq; + } + inline bool isCmpUQOpcode(LOpcode op) { + return LIR_eqq == op || (LIR_ltuq <= op && op <= LIR_geuq); + } +#endif + inline bool isCmpDOpcode(LOpcode op) { + return LIR_eqd <= op && op <= LIR_ged; + } + inline bool isCmpOpcode(LOpcode op) { + return isCmpIOpcode(op) || +#if defined NANOJIT_64BIT + isCmpQOpcode(op) || +#endif + isCmpDOpcode(op); + } + + inline LOpcode invertCondJmpOpcode(LOpcode op) { + NanoAssert(op == LIR_jt || op == LIR_jf); + return LOpcode(op ^ 1); + } + inline LOpcode invertCondGuardOpcode(LOpcode op) { + NanoAssert(op == LIR_xt || op == LIR_xf); + return LOpcode(op ^ 1); + } + inline LOpcode invertCmpOpcode(LOpcode op) { + NanoAssert(isCmpOpcode(op)); + return LOpcode(op ^ 1); + } + + inline LOpcode getCallOpcode(const CallInfo* ci) { + LOpcode op = LIR_callp; + switch (ci->returnType()) { + case ARGTYPE_V: op = LIR_callv; break; + case ARGTYPE_I: + case ARGTYPE_UI: op = LIR_calli; break; +#ifdef NANOJIT_64BIT + case ARGTYPE_Q: op = LIR_callq; break; +#endif + case ARGTYPE_D: op = LIR_calld; break; + default: NanoAssert(0); break; + } + return op; + } + + LOpcode arithOpcodeD2I(LOpcode op); +#ifdef NANOJIT_64BIT + LOpcode cmpOpcodeI2Q(LOpcode op); +#endif + LOpcode cmpOpcodeD2I(LOpcode op); + LOpcode cmpOpcodeD2UI(LOpcode op); + + // Array holding the 'repKind' field from LIRopcode.tbl. + extern const uint8_t repKinds[]; + + enum LTy { + LTy_V, // void: no value/no type + LTy_I, // int: 32-bit integer +#ifdef NANOJIT_64BIT + LTy_Q, // quad: 64-bit integer +#endif + LTy_D, // double: 64-bit float + + LTy_P = PTR_SIZE(LTy_I, LTy_Q) // word-sized integer + }; + + // Array holding the 'retType' field from LIRopcode.tbl. + extern const LTy retTypes[]; + + inline RegisterMask rmask(Register r) + { + return RegisterMask(1) << REGNUM(r); + } + + //----------------------------------------------------------------------- + // Low-level instructions. This is a bit complicated, because we have a + // variable-width representation to minimise space usage. + // + // - Instruction size is always an integral multiple of word size. + // + // - Every instruction has at least one word, holding the opcode and the + // reservation info ("SharedFields"). That word is in class LIns. + // + // - Beyond that, most instructions have 1, 2 or 3 extra words. These + // extra words are in classes LInsOp1, LInsOp2, etc (collectively called + // "LInsXYZ" in what follows). Each LInsXYZ class also contains an LIns, + // accessible by the 'ins' member, which holds the LIns data. + // + // - LIR is written forward, but read backwards. When reading backwards, + // in order to find the opcode, it must be in a predictable place in the + // LInsXYZ isn't affected by instruction width. Therefore, the LIns + // word (which contains the opcode) is always the *last* word in an + // instruction. + // + // - Each instruction is created by casting pre-allocated bytes from a + // LirBuffer to the LInsXYZ type. Therefore there are no constructors + // for LIns or LInsXYZ. + // + // - The standard handle for an instruction is a LIns*. This actually + // points to the LIns word, ie. to the final word in the instruction. + // This is a bit odd, but it allows the instruction's opcode to be + // easily accessed. Once you've looked at the opcode and know what kind + // of instruction it is, if you want to access any of the other words, + // you need to use toLInsXYZ(), which takes the LIns* and gives you an + // LInsXYZ*, ie. the pointer to the actual start of the instruction's + // bytes. From there you can access the instruction-specific extra + // words. + // + // - However, from outside class LIns, LInsXYZ isn't visible, nor is + // toLInsXYZ() -- from outside LIns, all LIR instructions are handled + // via LIns pointers and get/set methods are used for all LIns/LInsXYZ + // accesses. In fact, all data members in LInsXYZ are private and can + // only be accessed by LIns, which is a friend class. The only thing + // anyone outside LIns can do with a LInsXYZ is call getLIns(). + // + // - An example Op2 instruction and the likely pointers to it (each line + // represents a word, and pointers to a line point to the start of the + // word on that line): + // + // [ oprnd_2 <-- LInsOp2* insOp2 == toLInsOp2(ins) + // oprnd_1 + // opcode + resv ] <-- LIns* ins + // + // - LIR_skip instructions are used to link code chunks. If the first + // instruction on a chunk isn't a LIR_start, it will be a skip, and the + // skip's operand will point to the last LIns on the preceding chunk. + // LInsSk has the same layout as LInsOp1, but we represent it as a + // different class because there are some places where we treat + // skips specially and so having it separate seems like a good idea. + // + // - Various things about the size and layout of LIns and LInsXYZ are + // statically checked in staticSanityCheck(). In particular, this is + // worthwhile because there's nothing that guarantees that all the + // LInsXYZ classes have a size that is a multiple of word size (but in + // practice all sane compilers use a layout that results in this). We + // also check that every LInsXYZ is word-aligned in + // LirBuffer::makeRoom(); this seems sensible to avoid potential + // slowdowns due to misalignment. It relies on chunks themselves being + // word-aligned, which is extremely likely. + // + // - There is an enum, LInsRepKind, with one member for each of the + // LInsXYZ kinds. Each opcode is categorised with its LInsRepKind value + // in LIRopcode.tbl, and this is used in various places. + //----------------------------------------------------------------------- + + enum LInsRepKind { + // LRK_XYZ corresponds to class LInsXYZ. + LRK_Op0, + LRK_Op1, + LRK_Op2, + LRK_Op3, + LRK_Ld, + LRK_St, + LRK_Sk, + LRK_C, + LRK_P, + LRK_I, + LRK_QorD, + LRK_Jtbl, + LRK_None // this one is used for unused opcode numbers + }; + + class LInsOp0; + class LInsOp1; + class LInsOp2; + class LInsOp3; + class LInsLd; + class LInsSt; + class LInsSk; + class LInsC; + class LInsP; + class LInsI; + class LInsQorD; + class LInsJtbl; + + class LIns + { + private: + // SharedFields: fields shared by all LIns kinds. + // + // The .inReg, .regnum, .inAr and .arIndex fields form a "reservation" + // that is used temporarily during assembly to record information + // relating to register allocation. See class RegAlloc for more + // details. Note: all combinations of .inReg/.inAr are possible, ie. + // 0/0, 0/1, 1/0, 1/1. + // + // The .isResultLive field is only used for instructions that return + // results. It indicates if the result is live. It's set (if + // appropriate) and used only during the codegen pass. + // + struct SharedFields { + uint32_t inReg:1; // if 1, 'reg' is active + uint32_t regnum:7; + uint32_t inAr:1; // if 1, 'arIndex' is active + uint32_t isResultLive:1; // if 1, the instruction's result is live + + uint32_t arIndex:14; // index into stack frame; displ is -4*arIndex + + LOpcode opcode:8; // instruction's opcode + }; + + union { + SharedFields sharedFields; + // Force sizeof(LIns)==8 and 8-byte alignment on 64-bit machines. + // This is necessary because sizeof(SharedFields)==4 and we want all + // instances of LIns to be pointer-aligned. + void* wholeWord; + }; + + inline void initSharedFields(LOpcode opcode) + { + // We must zero .inReg, .inAR and .isResultLive, but zeroing the + // whole word is easier. Then we set the opcode. + wholeWord = 0; + sharedFields.opcode = opcode; + } + + // LIns-to-LInsXYZ converters. + inline LInsOp0* toLInsOp0() const; + inline LInsOp1* toLInsOp1() const; + inline LInsOp2* toLInsOp2() const; + inline LInsOp3* toLInsOp3() const; + inline LInsLd* toLInsLd() const; + inline LInsSt* toLInsSt() const; + inline LInsSk* toLInsSk() const; + inline LInsC* toLInsC() const; + inline LInsP* toLInsP() const; + inline LInsI* toLInsI() const; + inline LInsQorD* toLInsQorD() const; + inline LInsJtbl*toLInsJtbl()const; + + void staticSanityCheck(); + + public: + // LIns initializers. + inline void initLInsOp0(LOpcode opcode); + inline void initLInsOp1(LOpcode opcode, LIns* oprnd1); + inline void initLInsOp2(LOpcode opcode, LIns* oprnd1, LIns* oprnd2); + inline void initLInsOp3(LOpcode opcode, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3); + inline void initLInsLd(LOpcode opcode, LIns* val, int32_t d, AccSet accSet, LoadQual loadQual); + inline void initLInsSt(LOpcode opcode, LIns* val, LIns* base, int32_t d, AccSet accSet); + inline void initLInsSk(LIns* prevLIns); + // Nb: args[] must be allocated and initialised before being passed in; + // initLInsC() just copies the pointer into the LInsC. + inline void initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci); + inline void initLInsP(int32_t arg, int32_t kind); + inline void initLInsI(LOpcode opcode, int32_t immI); + inline void initLInsQorD(LOpcode opcode, uint64_t immQorD); + inline void initLInsJtbl(LIns* index, uint32_t size, LIns** table); + + LOpcode opcode() const { return sharedFields.opcode; } + + // Generally, void instructions (statements) are always live and + // non-void instructions (expressions) are live if used by another + // live instruction. But there are some trickier cases. + // Any non-void instruction can be marked isResultLive=1 even + // when it is unreachable, e.g. due to an always-taken branch. + // The assembler marks it live if it sees any uses, regardless of + // whether those uses are in reachable code or not. + bool isLive() const { + return isV() || + sharedFields.isResultLive || + (isCall() && !callInfo()->_isPure) || // impure calls are always live + isop(LIR_paramp); // LIR_paramp is always live + } + void setResultLive() { + NanoAssert(!isV()); + sharedFields.isResultLive = 1; + } + + // XXX: old reservation manipulating functions. See bug 538924. + // Replacement strategy: + // - deprecated_markAsClear() --> clearReg() and/or clearArIndex() + // - deprecated_hasKnownReg() --> isInReg() + // - deprecated_getReg() --> getReg() after checking isInReg() + // + void deprecated_markAsClear() { + sharedFields.inReg = 0; + sharedFields.inAr = 0; + } + bool deprecated_hasKnownReg() { + NanoAssert(isExtant()); + return isInReg(); + } + Register deprecated_getReg() { + NanoAssert(isExtant()); + if (isInReg()) { + Register r = { sharedFields.regnum }; + return r; + } else { + return deprecated_UnknownReg; + } + } + uint32_t deprecated_getArIndex() { + NanoAssert(isExtant()); + return ( isInAr() ? sharedFields.arIndex : 0 ); + } + + // Reservation manipulation. + // + // "Extant" mean "in existence, still existing, surviving". In other + // words, has the value been computed explicitly (not folded into + // something else) and is it still available (in a register or spill + // slot) for use? + bool isExtant() { + return isInReg() || isInAr(); + } + bool isInReg() { + return sharedFields.inReg; + } + bool isInRegMask(RegisterMask allow) { + return isInReg() && (rmask(getReg()) & allow); + } + Register getReg() { + NanoAssert(isInReg()); + Register r = { sharedFields.regnum }; + return r; + } + void setReg(Register r) { + sharedFields.inReg = 1; + sharedFields.regnum = REGNUM(r); + } + void clearReg() { + sharedFields.inReg = 0; + } + bool isInAr() { + return sharedFields.inAr; + } + uint32_t getArIndex() { + NanoAssert(isInAr()); + return sharedFields.arIndex; + } + void setArIndex(uint32_t arIndex) { + sharedFields.inAr = 1; + sharedFields.arIndex = arIndex; + } + void clearArIndex() { + sharedFields.inAr = 0; + } + + // For various instruction kinds. + inline LIns* oprnd1() const; + inline LIns* oprnd2() const; + inline LIns* oprnd3() const; + + // For branches. + inline LIns* getTarget() const; + inline void setTarget(LIns* label); + + // For guards. + inline GuardRecord* record() const; + + // For loads. + inline LoadQual loadQual() const; + + // For loads/stores. + inline int32_t disp() const; + inline MiniAccSet miniAccSet() const; + inline AccSet accSet() const; + + // For LInsSk. + inline LIns* prevLIns() const; + + // For LInsP. + inline uint8_t paramArg() const; + inline uint8_t paramKind() const; + + // For LInsI. + inline int32_t immI() const; + + // For LInsQorD. +#ifdef NANOJIT_64BIT + inline int32_t immQlo() const; + inline uint64_t immQ() const; +#endif + inline int32_t immDlo() const; + inline int32_t immDhi() const; + inline double immD() const; + inline uint64_t immDasQ() const; + + // For LIR_allocp. + inline int32_t size() const; + inline void setSize(int32_t nbytes); + + // For LInsC. + inline LIns* arg(uint32_t i) const; // right-to-left-order: arg(0) is rightmost + inline uint32_t argc() const; + inline LIns* callArgN(uint32_t n) const; + inline const CallInfo* callInfo() const; + + // For LIR_jtbl + inline uint32_t getTableSize() const; + inline LIns* getTarget(uint32_t index) const; + inline void setTarget(uint32_t index, LIns* label) const; + + // isLInsXYZ() returns true if the instruction has the LInsXYZ form. + // Note that there is some overlap with other predicates, eg. + // isStore()==isLInsSt(), isCall()==isLInsC(), but that's ok; these + // ones are used mostly to check that opcodes are appropriate for + // instruction layouts, the others are used for non-debugging + // purposes. + bool isLInsOp0() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_Op0 == repKinds[opcode()]; + } + bool isLInsOp1() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_Op1 == repKinds[opcode()]; + } + bool isLInsOp2() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_Op2 == repKinds[opcode()]; + } + bool isLInsOp3() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_Op3 == repKinds[opcode()]; + } + bool isLInsLd() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_Ld == repKinds[opcode()]; + } + bool isLInsSt() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_St == repKinds[opcode()]; + } + bool isLInsSk() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_Sk == repKinds[opcode()]; + } + bool isLInsC() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_C == repKinds[opcode()]; + } + bool isLInsP() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_P == repKinds[opcode()]; + } + bool isLInsI() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_I == repKinds[opcode()]; + } + bool isLInsQorD() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_QorD == repKinds[opcode()]; + } + bool isLInsJtbl() const { + NanoAssert(LRK_None != repKinds[opcode()]); + return LRK_Jtbl == repKinds[opcode()]; + } + + // LIns predicates. + bool isop(LOpcode o) const { + return opcode() == o; + } + bool isRet() const { + return isRetOpcode(opcode()); + } + bool isCmp() const { + return isCmpOpcode(opcode()); + } + bool isCall() const { + return isop(LIR_callv) || + isop(LIR_calli) || +#if defined NANOJIT_64BIT + isop(LIR_callq) || +#endif + isop(LIR_calld); + } + bool isCmov() const { + return isCmovOpcode(opcode()); + } + bool isStore() const { + return isLInsSt(); + } + bool isLoad() const { + return isLInsLd(); + } + bool isGuard() const { + return isop(LIR_x) || isop(LIR_xf) || isop(LIR_xt) || + isop(LIR_xbarrier) || isop(LIR_xtbl) || + isop(LIR_addxovi) || isop(LIR_subxovi) || isop(LIR_mulxovi); + } + bool isJov() const { + return +#ifdef NANOJIT_64BIT + isop(LIR_addjovq) || isop(LIR_subjovq) || +#endif + isop(LIR_addjovi) || isop(LIR_subjovi) || isop(LIR_muljovi); + } + // True if the instruction is a 32-bit integer immediate. + bool isImmI() const { + return isop(LIR_immi); + } + // True if the instruction is a 32-bit integer immediate and + // has the value 'val' when treated as a 32-bit signed integer. + bool isImmI(int32_t val) const { + return isImmI() && immI()==val; + } +#ifdef NANOJIT_64BIT + // True if the instruction is a 64-bit integer immediate. + bool isImmQ() const { + return isop(LIR_immq); + } +#endif + // True if the instruction is a pointer-sized integer immediate. + bool isImmP() const + { +#ifdef NANOJIT_64BIT + return isImmQ(); +#else + return isImmI(); +#endif + } + // True if the instruction is a 64-bit float immediate. + bool isImmD() const { + return isop(LIR_immd); + } + // True if the instruction is a 64-bit integer or float immediate. + bool isImmQorD() const { + return +#ifdef NANOJIT_64BIT + isImmQ() || +#endif + isImmD(); + } + // True if the instruction an any type of immediate. + bool isImmAny() const { + return isImmI() || isImmQorD(); + } + + bool isBranch() const { + return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl) || isJov(); + } + + LTy retType() const { + return retTypes[opcode()]; + } + bool isV() const { + return retType() == LTy_V; + } + bool isI() const { + return retType() == LTy_I; + } +#ifdef NANOJIT_64BIT + bool isQ() const { + return retType() == LTy_Q; + } +#endif + bool isD() const { + return retType() == LTy_D; + } + bool isQorD() const { + return +#ifdef NANOJIT_64BIT + isQ() || +#endif + isD(); + } + bool isP() const { +#ifdef NANOJIT_64BIT + return isQ(); +#else + return isI(); +#endif + } + + inline void* immP() const + { + #ifdef NANOJIT_64BIT + return (void*)immQ(); + #else + return (void*)immI(); + #endif + } + }; + + typedef SeqBuilder<LIns*> InsList; + typedef SeqBuilder<char*> StringList; + + + // 0-operand form. Used for LIR_start and LIR_label. + class LInsOp0 + { + private: + friend class LIns; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // 1-operand form. Used for LIR_reti, unary arithmetic/logic ops, etc. + class LInsOp1 + { + private: + friend class LIns; + + LIns* oprnd_1; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // 2-operand form. Used for guards, branches, comparisons, binary + // arithmetic/logic ops, etc. + class LInsOp2 + { + private: + friend class LIns; + + LIns* oprnd_2; + + LIns* oprnd_1; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // 3-operand form. Used for conditional moves, jov branches, and xov guards. + class LInsOp3 + { + private: + friend class LIns; + + LIns* oprnd_3; + + LIns* oprnd_2; + + LIns* oprnd_1; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // Used for all loads. + class LInsLd + { + private: + friend class LIns; + + // Nb: the LIR writer pipeline handles things if a displacement + // exceeds 16 bits. This is rare, but does happen occasionally. We + // could go to 24 bits but then it would happen so rarely that the + // handler code would be difficult to test and thus untrustworthy. + // + // Nb: the types of these bitfields are all 32-bit integers to ensure + // they are fully packed on Windows, sigh. Also, 'loadQual' is + // unsigned to ensure the values 0, 1, and 2 all fit in 2 bits. + // + // Nb: explicit signed keyword for bitfield types is required, + // some compilers may treat them as unsigned without it. + // See Bugzilla 584219 comment #18 + signed int disp:16; + signed int miniAccSetVal:8; + uint32_t loadQual:2; + + LIns* oprnd_1; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // Used for all stores. + class LInsSt + { + private: + friend class LIns; + + int16_t disp; + MiniAccSetVal miniAccSetVal; + + LIns* oprnd_2; + + LIns* oprnd_1; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // Used for LIR_skip. + class LInsSk + { + private: + friend class LIns; + + LIns* prevLIns; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // Used for all variants of LIR_call. + class LInsC + { + private: + friend class LIns; + + // Arguments in reverse order, just like insCall() (ie. args[0] holds + // the rightmost arg). The array should be allocated by the same + // allocator as the LIR buffers, because it has the same lifetime. + LIns** args; + + const CallInfo* ci; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // Used for LIR_paramp. + class LInsP + { + private: + friend class LIns; + + uintptr_t arg:8; + uintptr_t kind:8; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // Used for LIR_immi and LIR_allocp. + class LInsI + { + private: + friend class LIns; + + int32_t immI; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // Used for LIR_immq and LIR_immd. + class LInsQorD + { + private: + friend class LIns; + + int32_t immQorDlo; + + int32_t immQorDhi; + + LIns ins; + + public: + LIns* getLIns() { return &ins; }; + }; + + // Used for LIR_jtbl. 'oprnd_1' must be a uint32_t index in + // the range 0 <= index < size; no range check is performed. + // 'table' is an array of labels. + class LInsJtbl + { + private: + friend class LIns; + + uint32_t size; // number of entries in table + LIns** table; // pointer to table[size] with same lifetime as this LInsJtbl + LIns* oprnd_1; // uint32_t index expression + + LIns ins; + + public: + LIns* getLIns() { return &ins; } + }; + + // Used only as a placeholder for OP___ macros for unused opcodes in + // LIRopcode.tbl. + class LInsNone + { + }; + + LInsOp0* LIns::toLInsOp0() const { return (LInsOp0* )(uintptr_t(this+1) - sizeof(LInsOp0 )); } + LInsOp1* LIns::toLInsOp1() const { return (LInsOp1* )(uintptr_t(this+1) - sizeof(LInsOp1 )); } + LInsOp2* LIns::toLInsOp2() const { return (LInsOp2* )(uintptr_t(this+1) - sizeof(LInsOp2 )); } + LInsOp3* LIns::toLInsOp3() const { return (LInsOp3* )(uintptr_t(this+1) - sizeof(LInsOp3 )); } + LInsLd* LIns::toLInsLd() const { return (LInsLd* )(uintptr_t(this+1) - sizeof(LInsLd )); } + LInsSt* LIns::toLInsSt() const { return (LInsSt* )(uintptr_t(this+1) - sizeof(LInsSt )); } + LInsSk* LIns::toLInsSk() const { return (LInsSk* )(uintptr_t(this+1) - sizeof(LInsSk )); } + LInsC* LIns::toLInsC() const { return (LInsC* )(uintptr_t(this+1) - sizeof(LInsC )); } + LInsP* LIns::toLInsP() const { return (LInsP* )(uintptr_t(this+1) - sizeof(LInsP )); } + LInsI* LIns::toLInsI() const { return (LInsI* )(uintptr_t(this+1) - sizeof(LInsI )); } + LInsQorD* LIns::toLInsQorD() const { return (LInsQorD*)(uintptr_t(this+1) - sizeof(LInsQorD)); } + LInsJtbl* LIns::toLInsJtbl() const { return (LInsJtbl*)(uintptr_t(this+1) - sizeof(LInsJtbl)); } + + void LIns::initLInsOp0(LOpcode opcode) { + initSharedFields(opcode); + NanoAssert(isLInsOp0()); + } + void LIns::initLInsOp1(LOpcode opcode, LIns* oprnd1) { + initSharedFields(opcode); + toLInsOp1()->oprnd_1 = oprnd1; + NanoAssert(isLInsOp1()); + } + void LIns::initLInsOp2(LOpcode opcode, LIns* oprnd1, LIns* oprnd2) { + initSharedFields(opcode); + toLInsOp2()->oprnd_1 = oprnd1; + toLInsOp2()->oprnd_2 = oprnd2; + NanoAssert(isLInsOp2()); + } + void LIns::initLInsOp3(LOpcode opcode, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3) { + initSharedFields(opcode); + toLInsOp3()->oprnd_1 = oprnd1; + toLInsOp3()->oprnd_2 = oprnd2; + toLInsOp3()->oprnd_3 = oprnd3; + NanoAssert(isLInsOp3()); + } + void LIns::initLInsLd(LOpcode opcode, LIns* val, int32_t d, AccSet accSet, LoadQual loadQual) { + initSharedFields(opcode); + toLInsLd()->oprnd_1 = val; + NanoAssert(d == int16_t(d)); + toLInsLd()->disp = int16_t(d); + toLInsLd()->miniAccSetVal = compressAccSet(accSet).val; + toLInsLd()->loadQual = loadQual; + NanoAssert(isLInsLd()); + } + void LIns::initLInsSt(LOpcode opcode, LIns* val, LIns* base, int32_t d, AccSet accSet) { + initSharedFields(opcode); + toLInsSt()->oprnd_1 = val; + toLInsSt()->oprnd_2 = base; + NanoAssert(d == int16_t(d)); + toLInsSt()->disp = int16_t(d); + toLInsSt()->miniAccSetVal = compressAccSet(accSet).val; + NanoAssert(isLInsSt()); + } + void LIns::initLInsSk(LIns* prevLIns) { + initSharedFields(LIR_skip); + toLInsSk()->prevLIns = prevLIns; + NanoAssert(isLInsSk()); + } + void LIns::initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci) { + initSharedFields(opcode); + toLInsC()->args = args; + toLInsC()->ci = ci; + NanoAssert(isLInsC()); + } + void LIns::initLInsP(int32_t arg, int32_t kind) { + initSharedFields(LIR_paramp); + NanoAssert(isU8(arg) && isU8(kind)); + toLInsP()->arg = arg; + toLInsP()->kind = kind; + NanoAssert(isLInsP()); + } + void LIns::initLInsI(LOpcode opcode, int32_t immI) { + initSharedFields(opcode); + toLInsI()->immI = immI; + NanoAssert(isLInsI()); + } + void LIns::initLInsQorD(LOpcode opcode, uint64_t immQorD) { + initSharedFields(opcode); + toLInsQorD()->immQorDlo = int32_t(immQorD); + toLInsQorD()->immQorDhi = int32_t(immQorD >> 32); + NanoAssert(isLInsQorD()); + } + void LIns::initLInsJtbl(LIns* index, uint32_t size, LIns** table) { + initSharedFields(LIR_jtbl); + toLInsJtbl()->oprnd_1 = index; + toLInsJtbl()->table = table; + toLInsJtbl()->size = size; + NanoAssert(isLInsJtbl()); + } + + LIns* LIns::oprnd1() const { + NanoAssert(isLInsOp1() || isLInsOp2() || isLInsOp3() || isLInsLd() || isLInsSt() || isLInsJtbl()); + return toLInsOp2()->oprnd_1; + } + LIns* LIns::oprnd2() const { + NanoAssert(isLInsOp2() || isLInsOp3() || isLInsSt()); + return toLInsOp2()->oprnd_2; + } + LIns* LIns::oprnd3() const { + NanoAssert(isLInsOp3()); + return toLInsOp3()->oprnd_3; + } + + LIns* LIns::getTarget() const { + NanoAssert(isBranch() && !isop(LIR_jtbl)); + if (isJov()) + return oprnd3(); + else + return oprnd2(); + } + + void LIns::setTarget(LIns* label) { + NanoAssert(label && label->isop(LIR_label)); + NanoAssert(isBranch() && !isop(LIR_jtbl)); + if (isJov()) + toLInsOp3()->oprnd_3 = label; + else + toLInsOp2()->oprnd_2 = label; + } + + LIns* LIns::getTarget(uint32_t index) const { + NanoAssert(isop(LIR_jtbl)); + NanoAssert(index < toLInsJtbl()->size); + return toLInsJtbl()->table[index]; + } + + void LIns::setTarget(uint32_t index, LIns* label) const { + NanoAssert(label && label->isop(LIR_label)); + NanoAssert(isop(LIR_jtbl)); + NanoAssert(index < toLInsJtbl()->size); + toLInsJtbl()->table[index] = label; + } + + GuardRecord *LIns::record() const { + NanoAssert(isGuard()); + switch (opcode()) { + case LIR_x: + case LIR_xt: + case LIR_xf: + case LIR_xtbl: + case LIR_xbarrier: + return (GuardRecord*)oprnd2(); + + case LIR_addxovi: + case LIR_subxovi: + case LIR_mulxovi: + return (GuardRecord*)oprnd3(); + + default: + NanoAssert(0); + return NULL; + } + } + + LoadQual LIns::loadQual() const { + NanoAssert(isLInsLd()); + return (LoadQual)toLInsLd()->loadQual; + } + + int32_t LIns::disp() const { + if (isLInsSt()) { + return toLInsSt()->disp; + } else { + NanoAssert(isLInsLd()); + return toLInsLd()->disp; + } + } + + MiniAccSet LIns::miniAccSet() const { + MiniAccSet miniAccSet; + if (isLInsSt()) { + miniAccSet.val = toLInsSt()->miniAccSetVal; + } else { + NanoAssert(isLInsLd()); + miniAccSet.val = toLInsLd()->miniAccSetVal; + } + return miniAccSet; + } + + AccSet LIns::accSet() const { + return decompressMiniAccSet(miniAccSet()); + } + + LIns* LIns::prevLIns() const { + NanoAssert(isLInsSk()); + return toLInsSk()->prevLIns; + } + + inline uint8_t LIns::paramArg() const { NanoAssert(isop(LIR_paramp)); return toLInsP()->arg; } + inline uint8_t LIns::paramKind() const { NanoAssert(isop(LIR_paramp)); return toLInsP()->kind; } + + inline int32_t LIns::immI() const { NanoAssert(isImmI()); return toLInsI()->immI; } + +#ifdef NANOJIT_64BIT + inline int32_t LIns::immQlo() const { NanoAssert(isImmQ()); return toLInsQorD()->immQorDlo; } + uint64_t LIns::immQ() const { + NanoAssert(isImmQ()); + return (uint64_t(toLInsQorD()->immQorDhi) << 32) | uint32_t(toLInsQorD()->immQorDlo); + } +#endif + inline int32_t LIns::immDlo() const { NanoAssert(isImmD()); return toLInsQorD()->immQorDlo; } + inline int32_t LIns::immDhi() const { NanoAssert(isImmD()); return toLInsQorD()->immQorDhi; } + double LIns::immD() const { + NanoAssert(isImmD()); + union { + double f; + uint64_t q; + } u; + u.q = immDasQ(); + return u.f; + } + uint64_t LIns::immDasQ() const { + NanoAssert(isImmD()); + return (uint64_t(toLInsQorD()->immQorDhi) << 32) | uint32_t(toLInsQorD()->immQorDlo); + } + + int32_t LIns::size() const { + NanoAssert(isop(LIR_allocp)); + return toLInsI()->immI << 2; + } + + void LIns::setSize(int32_t nbytes) { + NanoAssert(isop(LIR_allocp)); + NanoAssert(nbytes > 0); + toLInsI()->immI = (nbytes+3)>>2; // # of required 32bit words + } + + // Index args in reverse order, i.e. arg(0) returns the rightmost arg. + // Nb: this must be kept in sync with insCall(). + LIns* LIns::arg(uint32_t i) const + { + NanoAssert(isCall()); + NanoAssert(i < callInfo()->count_args()); + return toLInsC()->args[i]; // args[] is in right-to-left order as well + } + + uint32_t LIns::argc() const { + return callInfo()->count_args(); + } + + LIns* LIns::callArgN(uint32_t n) const + { + return arg(argc()-n-1); + } + + const CallInfo* LIns::callInfo() const + { + NanoAssert(isCall()); + return toLInsC()->ci; + } + + uint32_t LIns::getTableSize() const + { + NanoAssert(isLInsJtbl()); + return toLInsJtbl()->size; + } + + class LirWriter + { + public: + LirWriter *out; + + LirWriter(LirWriter* out) + : out(out) {} + virtual ~LirWriter() {} + + virtual LIns* ins0(LOpcode v) { + return out->ins0(v); + } + virtual LIns* ins1(LOpcode v, LIns* a) { + return out->ins1(v, a); + } + virtual LIns* ins2(LOpcode v, LIns* a, LIns* b) { + return out->ins2(v, a, b); + } + virtual LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c) { + return out->ins3(v, a, b, c); + } + virtual LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr) { + return out->insGuard(v, c, gr); + } + virtual LIns* insGuardXov(LOpcode v, LIns *a, LIns* b, GuardRecord *gr) { + return out->insGuardXov(v, a, b, gr); + } + virtual LIns* insBranch(LOpcode v, LIns* condition, LIns* to) { + return out->insBranch(v, condition, to); + } + virtual LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) { + return out->insBranchJov(v, a, b, to); + } + // arg: 0=first, 1=second, ... + // kind: 0=arg 1=saved-reg + virtual LIns* insParam(int32_t arg, int32_t kind) { + return out->insParam(arg, kind); + } + virtual LIns* insImmI(int32_t imm) { + return out->insImmI(imm); + } +#ifdef NANOJIT_64BIT + virtual LIns* insImmQ(uint64_t imm) { + return out->insImmQ(imm); + } +#endif + virtual LIns* insImmD(double d) { + return out->insImmD(d); + } + virtual LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual) { + return out->insLoad(op, base, d, accSet, loadQual); + } + virtual LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet) { + return out->insStore(op, value, base, d, accSet); + } + // args[] is in reverse order, ie. args[0] holds the rightmost arg. + virtual LIns* insCall(const CallInfo *call, LIns* args[]) { + return out->insCall(call, args); + } + virtual LIns* insAlloc(int32_t size) { + NanoAssert(size != 0); + return out->insAlloc(size); + } + virtual LIns* insJtbl(LIns* index, uint32_t size) { + return out->insJtbl(index, size); + } + virtual LIns* insComment(const char* str) { + return out->insComment(str); + } + + // convenience functions + + // Inserts a conditional to execute and branches to execute if + // the condition is true and false respectively. + LIns* insChoose(LIns* cond, LIns* iftrue, LIns* iffalse, bool use_cmov); + + // Inserts an integer comparison to 0 + LIns* insEqI_0(LIns* oprnd1) { + return ins2ImmI(LIR_eqi, oprnd1, 0); + } + + // Inserts a pointer comparison to 0 + LIns* insEqP_0(LIns* oprnd1) { + return ins2(LIR_eqp, oprnd1, insImmWord(0)); + } + + // Inserts a binary operation where the second operand is an + // integer immediate. + LIns* ins2ImmI(LOpcode v, LIns* oprnd1, int32_t imm) { + return ins2(v, oprnd1, insImmI(imm)); + } + + LIns* insImmP(const void *ptr) { +#ifdef NANOJIT_64BIT + return insImmQ((uint64_t)ptr); +#else + return insImmI((int32_t)ptr); +#endif + } + + LIns* insImmWord(intptr_t value) { +#ifdef NANOJIT_64BIT + return insImmQ(value); +#else + return insImmI(value); +#endif + } + + // Sign-extend integers to native integers. On 32-bit this is a no-op. + LIns* insI2P(LIns* intIns) { +#ifdef NANOJIT_64BIT + return ins1(LIR_i2q, intIns); +#else + return intIns; +#endif + } + + // Zero-extend integers to native integers. On 32-bit this is a no-op. + LIns* insUI2P(LIns* uintIns) { + #ifdef NANOJIT_64BIT + return ins1(LIR_ui2uq, uintIns); + #else + return uintIns; + #endif + } + + // Do a load with LoadQual==LOAD_NORMAL. + LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet) { + return insLoad(op, base, d, accSet, LOAD_NORMAL); + } + + // Chooses LIR_sti, LIR_stq or LIR_std according to the type of 'value'. + LIns* insStore(LIns* value, LIns* base, int32_t d, AccSet accSet); + }; + + +#ifdef NJ_VERBOSE + extern const char* lirNames[]; + + // Maps address ranges to meaningful names. + class AddrNameMap + { + Allocator& allocator; + class Entry + { + public: + Entry(int) : name(0), size(0), align(0) {} + Entry(char *n, size_t s, size_t a) : name(n), size(s), align(a) {} + char* name; + size_t size:29, align:3; + }; + TreeMap<const void*, Entry*> names; // maps code regions to names + public: + AddrNameMap(Allocator& allocator); + void addAddrRange(const void *p, size_t size, size_t align, const char *name); + void lookupAddr(void *p, char*& name, int32_t& offset); + }; + + // Maps LIR instructions to meaningful names. + class LirNameMap + { + private: + Allocator& alloc; + + // A small string-wrapper class, required because we need '==' to + // compare string contents, not string pointers, when strings are used + // as keys in CountMap. + struct Str { + Allocator& alloc; + char* s; + + Str(Allocator& alloc_, const char* s_) : alloc(alloc_) { + s = new (alloc) char[1+strlen(s_)]; + strcpy(s, s_); + } + + bool operator==(const Str& str) const { + return (0 == strcmp(this->s, str.s)); + } + }; + + // Similar to 'struct Str' -- we need to hash the string's contents, + // not its pointer. + template<class K> struct StrHash { + static size_t hash(const Str &k) { + // (const void*) cast is required by ARM RVCT 2.2 + return murmurhash((const void*)k.s, strlen(k.s)); + } + }; + + template <class Key, class H=DefaultHash<Key> > + class CountMap: public HashMap<Key, int, H> { + public: + CountMap(Allocator& alloc) : HashMap<Key, int, H>(alloc, 128) {} + int add(Key k) { + int c = 1; + if (this->containsKey(k)) { + c = 1+this->get(k); + } + this->put(k,c); + return c; + } + }; + + CountMap<int> lircounts; + CountMap<const CallInfo *> funccounts; + CountMap<Str, StrHash<Str> > namecounts; + + void addNameWithSuffix(LIns* i, const char *s, int suffix, bool ignoreOneSuffix); + + class Entry + { + public: + Entry(int) : name(0) {} + Entry(char* n) : name(n) {} + char* name; + }; + + HashMap<LIns*, Entry*> names; + + public: + LirNameMap(Allocator& alloc) + : alloc(alloc), + lircounts(alloc), + funccounts(alloc), + namecounts(alloc), + names(alloc) + {} + + void addName(LIns* ins, const char *s); // gives 'ins' a special name + const char* createName(LIns* ins); // gives 'ins' a generic name + const char* lookupName(LIns* ins); + }; + + // We use big buffers for cases where we need to fit a whole instruction, + // and smaller buffers for all the others. These should easily be long + // enough, but for safety the formatXyz() functions check and won't exceed + // those limits. + class InsBuf { + public: + static const size_t len = 1000; + char buf[len]; + }; + class RefBuf { + public: + static const size_t len = 200; + char buf[len]; + }; + + class LInsPrinter + { + private: + Allocator& alloc; + const int EMB_NUM_USED_ACCS; + + char *formatImmI(RefBuf* buf, int32_t c); +#ifdef NANOJIT_64BIT + char *formatImmQ(RefBuf* buf, uint64_t c); +#endif + char *formatImmD(RefBuf* buf, double c); + void formatGuard(InsBuf* buf, LIns* ins); // defined by the embedder + void formatGuardXov(InsBuf* buf, LIns* ins); // defined by the embedder + + public: + static const char* accNames[]; // defined by the embedder + + LInsPrinter(Allocator& alloc, int embNumUsedAccs) + : alloc(alloc), EMB_NUM_USED_ACCS(embNumUsedAccs) + { + addrNameMap = new (alloc) AddrNameMap(alloc); + lirNameMap = new (alloc) LirNameMap(alloc); + } + + char *formatAddr(RefBuf* buf, void* p); + char *formatRef(RefBuf* buf, LIns* ref, bool showImmValue = true); + char *formatIns(InsBuf* buf, LIns* ins); + char *formatAccSet(RefBuf* buf, AccSet accSet); + + AddrNameMap* addrNameMap; + LirNameMap* lirNameMap; + }; + + + class VerboseWriter : public LirWriter + { + InsList code; + LInsPrinter* printer; + LogControl* logc; + const char* const prefix; + bool const always_flush; + public: + VerboseWriter(Allocator& alloc, LirWriter *out, LInsPrinter* printer, LogControl* logc, + const char* prefix = "", bool always_flush = false) + : LirWriter(out), code(alloc), printer(printer), logc(logc), prefix(prefix), always_flush(always_flush) + {} + + LIns* add(LIns* i) { + if (i) { + code.add(i); + if (always_flush) + flush(); + } + return i; + } + + LIns* add_flush(LIns* i) { + if ((i = add(i)) != 0) + flush(); + return i; + } + + void flush() + { + if (!code.isEmpty()) { + InsBuf b; + for (Seq<LIns*>* p = code.get(); p != NULL; p = p->tail) + logc->printf("%s %s\n", prefix, printer->formatIns(&b, p->head)); + code.clear(); + } + } + + LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr) { + return add_flush(out->insGuard(op,cond,gr)); + } + + LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr) { + return add(out->insGuardXov(op,a,b,gr)); + } + + LIns* insBranch(LOpcode v, LIns* condition, LIns* to) { + return add_flush(out->insBranch(v, condition, to)); + } + + LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) { + return add(out->insBranchJov(v, a, b, to)); + } + + LIns* insJtbl(LIns* index, uint32_t size) { + return add_flush(out->insJtbl(index, size)); + } + + LIns* ins0(LOpcode v) { + if (v == LIR_label || v == LIR_start) { + flush(); + } + return add(out->ins0(v)); + } + + LIns* ins1(LOpcode v, LIns* a) { + return isRetOpcode(v) ? add_flush(out->ins1(v, a)) : add(out->ins1(v, a)); + } + LIns* ins2(LOpcode v, LIns* a, LIns* b) { + return add(out->ins2(v, a, b)); + } + LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c) { + return add(out->ins3(v, a, b, c)); + } + LIns* insCall(const CallInfo *call, LIns* args[]) { + return add_flush(out->insCall(call, args)); + } + LIns* insParam(int32_t i, int32_t kind) { + return add(out->insParam(i, kind)); + } + LIns* insLoad(LOpcode v, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual) { + return add(out->insLoad(v, base, disp, accSet, loadQual)); + } + LIns* insStore(LOpcode op, LIns* v, LIns* b, int32_t d, AccSet accSet) { + return add_flush(out->insStore(op, v, b, d, accSet)); + } + LIns* insAlloc(int32_t size) { + return add(out->insAlloc(size)); + } + LIns* insImmI(int32_t imm) { + return add(out->insImmI(imm)); + } +#ifdef NANOJIT_64BIT + LIns* insImmQ(uint64_t imm) { + return add(out->insImmQ(imm)); + } +#endif + LIns* insImmD(double d) { + return add(out->insImmD(d)); + } + + LIns* insComment(const char* str) { + return add_flush(out->insComment(str)); + } + }; + +#endif + + class ExprFilter: public LirWriter + { + public: + ExprFilter(LirWriter *out) : LirWriter(out) {} + LIns* ins1(LOpcode v, LIns* a); + LIns* ins2(LOpcode v, LIns* a, LIns* b); + LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c); + LIns* insGuard(LOpcode, LIns* cond, GuardRecord *); + LIns* insGuardXov(LOpcode, LIns* a, LIns* b, GuardRecord *); + LIns* insBranch(LOpcode, LIns* cond, LIns* target); + LIns* insBranchJov(LOpcode, LIns* a, LIns* b, LIns* target); + LIns* insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet, LoadQual loadQual); + private: + LIns* simplifyOverflowArith(LOpcode op, LIns** opnd1, LIns** opnd2); + }; + + class CseFilter: public LirWriter + { + enum NLKind { + // We divide instruction kinds into groups. LIns0 isn't present + // because we don't need to record any 0-ary instructions. Loads + // aren't here, they're handled separately. + NLImmISmall = 0, + NLImmILarge = 1, + NLImmQ = 2, // only occurs on 64-bit platforms + NLImmD = 3, + NL1 = 4, + NL2 = 5, + NL3 = 6, + NLCall = 7, + + NLFirst = 0, + NLLast = 7, + // Need a value after "last" to outsmart compilers that insist last+1 is impossible. + NLInvalid = 8 + }; + #define nextNLKind(kind) NLKind(kind+1) + + // There is one table for each NLKind. This lets us size the lists + // appropriately (some instruction kinds are more common than others). + // It also lets us have NLKind-specific find/add/grow functions, which + // are faster than generic versions. + // + // Nb: m_listNL and m_capNL sizes must be a power of 2. + // Don't start m_capNL too small, or we'll waste time growing and rehashing. + // Don't start m_capNL too large, will waste memory. + // + LIns** m_listNL[NLLast + 1]; + uint32_t m_capNL[ NLLast + 1]; + uint32_t m_usedNL[NLLast + 1]; + typedef uint32_t (CseFilter::*find_t)(LIns*); + find_t m_findNL[NLLast + 1]; + + // Similarly, for loads, there is one table for each CseAcc. A CseAcc + // is like a normal access region, but there are two extra possible + // values: CSE_ACC_CONST, which is where we put all CONST-qualified + // loads, and CSE_ACC_MULTIPLE, where we put all multi-region loads. + // All remaining loads are single-region and go in the table entry for + // their region. + // + // This arrangement makes the removal of invalidated loads fast -- we + // can invalidate all loads from a single region by clearing that + // region's table. + // + typedef uint8_t CseAcc; // same type as MiniAccSet + + static const uint8_t CSE_NUM_ACCS = NUM_ACCS + 2; + + // These values would be 'static const' except they are defined in + // terms of EMB_NUM_USED_ACCS which is itself not 'static const' + // because it's passed in by the embedding. + const uint8_t EMB_NUM_USED_ACCS; // number of access regions used by the embedding + const uint8_t CSE_NUM_USED_ACCS; // EMB_NUM_USED_ACCS + 2 + const CseAcc CSE_ACC_CONST; // EMB_NUM_USED_ACCS + 0 + const CseAcc CSE_ACC_MULTIPLE; // EMB_NUM_USED_ACCS + 1 + + // We will only use CSE_NUM_USED_ACCS of these entries, ie. the + // number of lists allocated depends on the number of access regions + // in use by the embedding. + LIns** m_listL[CSE_NUM_ACCS]; + uint32_t m_capL[ CSE_NUM_ACCS]; + uint32_t m_usedL[CSE_NUM_ACCS]; + + AccSet storesSinceLastLoad; // regions stored to since the last load + + Allocator& alloc; + + // After a conditional guard such as "xf cmp", we know that 'cmp' must + // be true, else we would have side-exited. So if we see 'cmp' again + // we can treat it like a constant. This table records such + // comparisons. + HashMap <LIns*, bool> knownCmpValues; + + // If true, we will not add new instructions to the CSE tables, but we + // will continue to CSE instructions that match existing table + // entries. Load instructions will still be removed if aliasing + // stores are encountered. + bool suspended; + + CseAcc miniAccSetToCseAcc(MiniAccSet miniAccSet, LoadQual loadQual) { + NanoAssert(miniAccSet.val < NUM_ACCS || miniAccSet.val == MINI_ACCSET_MULTIPLE.val); + return (loadQual == LOAD_CONST) ? CSE_ACC_CONST : + (miniAccSet.val == MINI_ACCSET_MULTIPLE.val) ? CSE_ACC_MULTIPLE : + miniAccSet.val; + } + + static uint32_t hash8(uint32_t hash, const uint8_t data); + static uint32_t hash32(uint32_t hash, const uint32_t data); + static uint32_t hashptr(uint32_t hash, const void* data); + static uint32_t hashfinish(uint32_t hash); + + static uint32_t hashImmI(int32_t); + static uint32_t hashImmQorD(uint64_t); // not NANOJIT_64BIT-only -- used by findImmD() + static uint32_t hash1(LOpcode op, LIns*); + static uint32_t hash2(LOpcode op, LIns*, LIns*); + static uint32_t hash3(LOpcode op, LIns*, LIns*, LIns*); + static uint32_t hashLoad(LOpcode op, LIns*, int32_t); + static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]); + + // These versions are used before an LIns has been created. + LIns* findImmISmall(int32_t a, uint32_t &k); + LIns* findImmILarge(int32_t a, uint32_t &k); +#ifdef NANOJIT_64BIT + LIns* findImmQ(uint64_t a, uint32_t &k); +#endif + LIns* findImmD(uint64_t d, uint32_t &k); + LIns* find1(LOpcode v, LIns* a, uint32_t &k); + LIns* find2(LOpcode v, LIns* a, LIns* b, uint32_t &k); + LIns* find3(LOpcode v, LIns* a, LIns* b, LIns* c, uint32_t &k); + LIns* findLoad(LOpcode v, LIns* a, int32_t b, MiniAccSet miniAccSet, LoadQual loadQual, + uint32_t &k); + LIns* findCall(const CallInfo *call, uint32_t argc, LIns* args[], uint32_t &k); + + // These versions are used after an LIns has been created; they are + // used for rehashing after growing. They just call onto the + // multi-arg versions above. + uint32_t findImmISmall(LIns* ins); + uint32_t findImmILarge(LIns* ins); +#ifdef NANOJIT_64BIT + uint32_t findImmQ(LIns* ins); +#endif + uint32_t findImmD(LIns* ins); + uint32_t find1(LIns* ins); + uint32_t find2(LIns* ins); + uint32_t find3(LIns* ins); + uint32_t findCall(LIns* ins); + uint32_t findLoad(LIns* ins); + + // These return false if they failed to grow due to OOM. + bool growNL(NLKind kind); + bool growL(CseAcc cseAcc); + + void addNLImmISmall(LIns* ins, uint32_t k); + // 'k' is the index found by findXYZ(). + void addNL(NLKind kind, LIns* ins, uint32_t k); + void addL(LIns* ins, uint32_t k); + + void clearAll(); // clears all tables + void clearNL(NLKind); // clears one non-load table + void clearL(CseAcc); // clears one load table + + public: + CseFilter(LirWriter *out, uint8_t embNumUsedAccs, Allocator&); + + // CseFilter does some largish fallible allocations at start-up. If + // they fail, the constructor sets this field to 'true'. It should be + // checked after creation, and if set the CseFilter cannot be used. + // (But the check can be skipped if allocChunk() always succeeds.) + // + // FIXME: This fallibility is a sop to TraceMonkey's implementation of + // infallible malloc -- by avoiding some largish infallible + // allocations, it reduces the size of the reserve space needed. + // Bug 624590 is open to fix this. + bool initOOM; + + LIns* insImmI(int32_t imm); +#ifdef NANOJIT_64BIT + LIns* insImmQ(uint64_t q); +#endif + LIns* insImmD(double d); + LIns* ins0(LOpcode v); + LIns* ins1(LOpcode v, LIns*); + LIns* ins2(LOpcode v, LIns*, LIns*); + LIns* ins3(LOpcode v, LIns*, LIns*, LIns*); + LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual); + LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet); + LIns* insCall(const CallInfo *call, LIns* args[]); + LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr); + LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr); + + // These functions provide control over CSE in the face of control + // flow. A suspend()/resume() pair may be put around a synthetic + // control flow diamond, preventing the inserted label from resetting + // the CSE state. A suspend() call must be dominated by a resume() + // call, else incorrect code could result. + void suspend() { suspended = true; } + void resume() { suspended = false; } + }; + + class LirBuffer + { + public: + LirBuffer(Allocator& alloc); + void clear(); + uintptr_t makeRoom(size_t szB); // make room for an instruction + + debug_only (void validate() const;) + verbose_only(LInsPrinter* printer;) + + int32_t insCount(); + + // stats + struct + { + uint32_t lir; // # instructions + } + _stats; + + AbiKind abi; + LIns *state, *param1, *sp, *rp; + LIns* savedRegs[NumSavedRegs+1]; // Allocate an extra element in case NumSavedRegs == 0 + + /** Each chunk is just a raw area of LIns instances, with no header + and no more than 8-byte alignment. The chunk size is somewhat arbitrary. */ + static const size_t CHUNK_SZB = 8000; + + protected: + friend class LirBufWriter; + + /** Get CHUNK_SZB more memory for LIR instructions. */ + void chunkAlloc(); + void moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk); + + Allocator& _allocator; + uintptr_t _unused; // next unused instruction slot in the current LIR chunk + uintptr_t _limit; // one past the last usable byte of the current LIR chunk + }; + + class LirBufWriter : public LirWriter + { + LirBuffer* _buf; // underlying buffer housing the instructions + const Config& _config; + + public: + LirBufWriter(LirBuffer* buf, const Config& config) + : LirWriter(0), _buf(buf), _config(config) { + } + + // LirWriter interface + LIns* insLoad(LOpcode op, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual); + LIns* insStore(LOpcode op, LIns* o1, LIns* o2, int32_t disp, AccSet accSet); + LIns* ins0(LOpcode op); + LIns* ins1(LOpcode op, LIns* o1); + LIns* ins2(LOpcode op, LIns* o1, LIns* o2); + LIns* ins3(LOpcode op, LIns* o1, LIns* o2, LIns* o3); + LIns* insParam(int32_t i, int32_t kind); + LIns* insImmI(int32_t imm); +#ifdef NANOJIT_64BIT + LIns* insImmQ(uint64_t imm); +#endif + LIns* insImmD(double d); + LIns* insCall(const CallInfo *call, LIns* args[]); + LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr); + LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr); + LIns* insBranch(LOpcode v, LIns* condition, LIns* to); + LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to); + LIns* insAlloc(int32_t size); + LIns* insJtbl(LIns* index, uint32_t size); + LIns* insComment(const char* str); + }; + + class LirFilter + { + public: + LirFilter *in; + LirFilter(LirFilter *in) : in(in) {} + virtual ~LirFilter(){} + + // It's crucial that once this reaches the LIR_start at the beginning + // of the buffer, that it just keeps returning that LIR_start LIns on + // any subsequent calls. + virtual LIns* read() { + return in->read(); + } + virtual LIns* finalIns() { + return in->finalIns(); + } + }; + + // concrete + class LirReader : public LirFilter + { + LIns* _ins; // next instruction to be read; invariant: is never a skip + LIns* _finalIns; // final instruction in the stream; ie. the first one to be read + + public: + LirReader(LIns* ins) : LirFilter(0), _ins(ins), _finalIns(ins) + { + // The last instruction for a fragment shouldn't be a skip. + // (Actually, if the last *inserted* instruction exactly fills up + // a chunk, a new chunk will be created, and thus the last *written* + // instruction will be a skip -- the one needed for the + // cross-chunk link. But the last *inserted* instruction is what + // is recorded and used to initialise each LirReader, and that is + // what is seen here, and therefore this assertion holds.) + NanoAssert(ins && !ins->isop(LIR_skip)); + } + virtual ~LirReader() {} + + // Returns next instruction and advances to the prior instruction. + // Invariant: never returns a skip. + LIns* read(); + + LIns* finalIns() { + return _finalIns; + } + }; + + verbose_only(void live(LirFilter* in, Allocator& alloc, Fragment* frag, LogControl*);) + + // WARNING: StackFilter assumes that all stack entries are eight bytes. + // Some of its optimisations aren't valid if that isn't true. See + // StackFilter::read() for more details. + class StackFilter: public LirFilter + { + LIns* sp; + BitSet stk; + int top; + int getTop(LIns* br); + + public: + StackFilter(LirFilter *in, Allocator& alloc, LIns* sp); + LIns* read(); + }; + + // This type is used to perform a simple interval analysis of 32-bit + // add/sub/mul. It lets us avoid overflow checks in some cases. + struct Interval + { + // The bounds are 64-bit integers so that any overflow from a 32-bit + // operation can be safely detected. + // + // If 'hasOverflowed' is false, 'lo' and 'hi' must be in the range + // I32_MIN..I32_MAX. If 'hasOverflowed' is true, 'lo' and 'hi' should + // not be trusted (and in debug builds we set them both to a special + // value UNTRUSTWORTHY that is outside the I32_MIN..I32_MAX range to + // facilitate sanity checking). + // + int64_t lo; + int64_t hi; + bool hasOverflowed; + + static const int64_t I32_MIN = int64_t(int32_t(0x80000000)); + static const int64_t I32_MAX = int64_t(int32_t(0x7fffffff)); + +#ifdef DEBUG + static const int64_t UNTRUSTWORTHY = int64_t(0xdeafdeadbeeffeedLL); + + bool isSane() { + return (hasOverflowed && lo == UNTRUSTWORTHY && hi == UNTRUSTWORTHY) || + (!hasOverflowed && lo <= hi && I32_MIN <= lo && hi <= I32_MAX); + } +#endif + + Interval(int64_t lo_, int64_t hi_) { + if (lo_ < I32_MIN || I32_MAX < hi_) { + hasOverflowed = true; +#ifdef DEBUG + lo = UNTRUSTWORTHY; + hi = UNTRUSTWORTHY; +#endif + } else { + hasOverflowed = false; + lo = lo_; + hi = hi_; + } + NanoAssert(isSane()); + } + + static Interval OverflowInterval() { + Interval interval(0, 0); +#ifdef DEBUG + interval.lo = UNTRUSTWORTHY; + interval.hi = UNTRUSTWORTHY; +#endif + interval.hasOverflowed = true; + return interval; + } + + static Interval of(LIns* ins, int32_t lim); + + static Interval add(Interval x, Interval y); + static Interval sub(Interval x, Interval y); + static Interval mul(Interval x, Interval y); + + bool canBeZero() { + NanoAssert(isSane()); + return hasOverflowed || (lo <= 0 && 0 <= hi); + } + + bool canBeNegative() { + NanoAssert(isSane()); + return hasOverflowed || (lo < 0); + } + }; + +#if NJ_SOFTFLOAT_SUPPORTED + struct SoftFloatOps + { + const CallInfo* opmap[LIR_sentinel]; + SoftFloatOps(); + }; + + extern const SoftFloatOps softFloatOps; + + // Replaces fpu ops with function calls, for platforms lacking float + // hardware (eg. some ARM machines). + class SoftFloatFilter: public LirWriter + { + public: + static const CallInfo* opmap[LIR_sentinel]; + + SoftFloatFilter(LirWriter *out); + LIns *split(LIns *a); + LIns *split(const CallInfo *call, LIns* args[]); + LIns *callD1(const CallInfo *call, LIns *a); + LIns *callD2(const CallInfo *call, LIns *a, LIns *b); + LIns *callI1(const CallInfo *call, LIns *a); + LIns *cmpD(const CallInfo *call, LIns *a, LIns *b); + LIns *ins1(LOpcode op, LIns *a); + LIns *ins2(LOpcode op, LIns *a, LIns *b); + LIns *insCall(const CallInfo *ci, LIns* args[]); + }; +#endif + +#ifdef DEBUG + // This class does thorough checking of LIR. It checks *implicit* LIR + // instructions, ie. LIR instructions specified via arguments -- to + // methods like insLoad() -- that have not yet been converted into + // *explicit* LIns objects in a LirBuffer. The reason for this is that if + // we wait until the LIR instructions are explicit, they will have gone + // through the entire writer pipeline and been optimised. By checking + // implicit LIR instructions we can check the LIR code at the start of the + // writer pipeline, exactly as it is generated by the compiler front-end. + // + // A general note about the errors produced by this class: for + // TraceMonkey, they won't include special names for instructions that + // have them unless TMFLAGS is specified. + class ValidateWriter : public LirWriter + { + private: + LInsPrinter* printer; + const char* whereInPipeline; + + const char* type2string(LTy type); + void typeCheckArgs(LOpcode op, int nArgs, LTy formals[], LIns* args[]); + void errorStructureShouldBe(LOpcode op, const char* argDesc, int argN, LIns* arg, + const char* shouldBeDesc); + void errorAccSet(const char* what, AccSet accSet, const char* shouldDesc); + void errorLoadQual(const char* what, LoadQual loadQual); + void checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2); + void checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins); + void checkLInsIsNull(LOpcode op, int argN, LIns* ins); + void checkAccSet(LOpcode op, LIns* base, int32_t disp, AccSet accSet); // defined by the embedder + + // These can be set by the embedder and used in checkAccSet(). + void** checkAccSetExtras; + + public: + ValidateWriter(LirWriter* out, LInsPrinter* printer, const char* where); + void setCheckAccSetExtras(void** extras) { checkAccSetExtras = extras; } + + LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual); + LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet); + LIns* ins0(LOpcode v); + LIns* ins1(LOpcode v, LIns* a); + LIns* ins2(LOpcode v, LIns* a, LIns* b); + LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c); + LIns* insParam(int32_t arg, int32_t kind); + LIns* insImmI(int32_t imm); +#ifdef NANOJIT_64BIT + LIns* insImmQ(uint64_t imm); +#endif + LIns* insImmD(double d); + LIns* insCall(const CallInfo *call, LIns* args[]); + LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr); + LIns* insGuardXov(LOpcode v, LIns* a, LIns* b, GuardRecord* gr); + LIns* insBranch(LOpcode v, LIns* condition, LIns* to); + LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to); + LIns* insAlloc(int32_t size); + LIns* insJtbl(LIns* index, uint32_t size); + }; + + // This just checks things that aren't possible to check in + // ValidateWriter, eg. whether all branch targets are set and are labels. + class ValidateReader: public LirFilter { + public: + ValidateReader(LirFilter* in); + LIns* read(); + }; +#endif + +#ifdef NJ_VERBOSE + /* A listing filter for LIR, going through backwards. It merely + passes its input to its output, but notes it down too. When + finish() is called, prints out what went through. Is intended to be + used to print arbitrary intermediate transformation stages of + LIR. */ + class ReverseLister : public LirFilter + { + Allocator& _alloc; + LInsPrinter* _printer; + const char* _title; + StringList _strs; + LogControl* _logc; + LIns* _prevIns; + public: + ReverseLister(LirFilter* in, Allocator& alloc, + LInsPrinter* printer, LogControl* logc, const char* title) + : LirFilter(in) + , _alloc(alloc) + , _printer(printer) + , _title(title) + , _strs(alloc) + , _logc(logc) + , _prevIns(NULL) + { } + + void finish(); + LIns* read(); + }; +#endif + +} +#endif // __nanojit_LIR__ |