summaryrefslogtreecommitdiff
path: root/js/src/nanojit/LIR.h
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/nanojit/LIR.h')
-rw-r--r--js/src/nanojit/LIR.h2443
1 files changed, 2443 insertions, 0 deletions
diff --git a/js/src/nanojit/LIR.h b/js/src/nanojit/LIR.h
new file mode 100644
index 0000000..4d6f03f
--- /dev/null
+++ b/js/src/nanojit/LIR.h
@@ -0,0 +1,2443 @@
+/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef __nanojit_LIR__
+#define __nanojit_LIR__
+
+namespace nanojit
+{
+ enum LOpcode
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+#pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
+ : unsigned
+#endif
+ {
+#define OP___(op, number, repKind, retType, isCse) \
+ LIR_##op = (number),
+#include "LIRopcode.tbl"
+ LIR_sentinel,
+#undef OP___
+
+#ifdef NANOJIT_64BIT
+# define PTR_SIZE(a,b) b
+#else
+# define PTR_SIZE(a,b) a
+#endif
+
+ // Pointer-sized synonyms.
+
+ LIR_paramp = PTR_SIZE(LIR_parami, LIR_paramq),
+
+ LIR_retp = PTR_SIZE(LIR_reti, LIR_retq),
+
+ LIR_livep = PTR_SIZE(LIR_livei, LIR_liveq),
+
+ LIR_ldp = PTR_SIZE(LIR_ldi, LIR_ldq),
+
+ LIR_stp = PTR_SIZE(LIR_sti, LIR_stq),
+
+ LIR_callp = PTR_SIZE(LIR_calli, LIR_callq),
+
+ LIR_eqp = PTR_SIZE(LIR_eqi, LIR_eqq),
+ LIR_ltp = PTR_SIZE(LIR_lti, LIR_ltq),
+ LIR_gtp = PTR_SIZE(LIR_gti, LIR_gtq),
+ LIR_lep = PTR_SIZE(LIR_lei, LIR_leq),
+ LIR_gep = PTR_SIZE(LIR_gei, LIR_geq),
+ LIR_ltup = PTR_SIZE(LIR_ltui, LIR_ltuq),
+ LIR_gtup = PTR_SIZE(LIR_gtui, LIR_gtuq),
+ LIR_leup = PTR_SIZE(LIR_leui, LIR_leuq),
+ LIR_geup = PTR_SIZE(LIR_geui, LIR_geuq),
+
+ LIR_addp = PTR_SIZE(LIR_addi, LIR_addq),
+ LIR_subp = PTR_SIZE(LIR_subi, LIR_subq),
+ LIR_addjovp = PTR_SIZE(LIR_addjovi, LIR_addjovq),
+
+ LIR_andp = PTR_SIZE(LIR_andi, LIR_andq),
+ LIR_orp = PTR_SIZE(LIR_ori, LIR_orq),
+ LIR_xorp = PTR_SIZE(LIR_xori, LIR_xorq),
+
+ LIR_lshp = PTR_SIZE(LIR_lshi, LIR_lshq),
+ LIR_rshp = PTR_SIZE(LIR_rshi, LIR_rshq),
+ LIR_rshup = PTR_SIZE(LIR_rshui, LIR_rshuq),
+
+ LIR_cmovp = PTR_SIZE(LIR_cmovi, LIR_cmovq)
+ };
+
+ // 32-bit integer comparisons must be contiguous, as must 64-bit integer
+ // comparisons and 64-bit float comparisons.
+ NanoStaticAssert(LIR_eqi + 1 == LIR_lti &&
+ LIR_eqi + 2 == LIR_gti &&
+ LIR_eqi + 3 == LIR_lei &&
+ LIR_eqi + 4 == LIR_gei &&
+ LIR_eqi + 5 == LIR_ltui &&
+ LIR_eqi + 6 == LIR_gtui &&
+ LIR_eqi + 7 == LIR_leui &&
+ LIR_eqi + 8 == LIR_geui);
+#ifdef NANOJIT_64BIT
+ NanoStaticAssert(LIR_eqq + 1 == LIR_ltq &&
+ LIR_eqq + 2 == LIR_gtq &&
+ LIR_eqq + 3 == LIR_leq &&
+ LIR_eqq + 4 == LIR_geq &&
+ LIR_eqq + 5 == LIR_ltuq &&
+ LIR_eqq + 6 == LIR_gtuq &&
+ LIR_eqq + 7 == LIR_leuq &&
+ LIR_eqq + 8 == LIR_geuq);
+#endif
+ NanoStaticAssert(LIR_eqd + 1 == LIR_ltd &&
+ LIR_eqd + 2 == LIR_gtd &&
+ LIR_eqd + 3 == LIR_led &&
+ LIR_eqd + 4 == LIR_ged);
+
+ // Various opcodes must be changeable to their opposite with op^1
+ // (although we use invertXyz() when possible, ie. outside static
+ // assertions).
+ NanoStaticAssert((LIR_jt^1) == LIR_jf && (LIR_jf^1) == LIR_jt);
+
+ NanoStaticAssert((LIR_xt^1) == LIR_xf && (LIR_xf^1) == LIR_xt);
+
+ NanoStaticAssert((LIR_lti^1) == LIR_gti && (LIR_gti^1) == LIR_lti);
+ NanoStaticAssert((LIR_lei^1) == LIR_gei && (LIR_gei^1) == LIR_lei);
+ NanoStaticAssert((LIR_ltui^1) == LIR_gtui && (LIR_gtui^1) == LIR_ltui);
+ NanoStaticAssert((LIR_leui^1) == LIR_geui && (LIR_geui^1) == LIR_leui);
+
+#ifdef NANOJIT_64BIT
+ NanoStaticAssert((LIR_ltq^1) == LIR_gtq && (LIR_gtq^1) == LIR_ltq);
+ NanoStaticAssert((LIR_leq^1) == LIR_geq && (LIR_geq^1) == LIR_leq);
+ NanoStaticAssert((LIR_ltuq^1) == LIR_gtuq && (LIR_gtuq^1) == LIR_ltuq);
+ NanoStaticAssert((LIR_leuq^1) == LIR_geuq && (LIR_geuq^1) == LIR_leuq);
+#endif
+
+ NanoStaticAssert((LIR_ltd^1) == LIR_gtd && (LIR_gtd^1) == LIR_ltd);
+ NanoStaticAssert((LIR_led^1) == LIR_ged && (LIR_ged^1) == LIR_led);
+
+
+ struct GuardRecord;
+ struct SideExit;
+
+ enum AbiKind {
+ ABI_FASTCALL,
+ ABI_THISCALL,
+ ABI_STDCALL,
+ ABI_CDECL
+ };
+
+ // This is much the same as LTy, but we need to distinguish signed and
+ // unsigned 32-bit ints so that they will be extended to 64-bits correctly
+ // on 64-bit platforms.
+ //
+ // All values must fit into three bits. See CallInfo for details.
+ enum ArgType {
+ ARGTYPE_V = 0, // void
+ ARGTYPE_I = 1, // int32_t
+ ARGTYPE_UI = 2, // uint32_t
+#ifdef NANOJIT_64BIT
+ ARGTYPE_Q = 3, // uint64_t
+#endif
+ ARGTYPE_D = 4, // double
+
+ // aliases
+ ARGTYPE_P = PTR_SIZE(ARGTYPE_I, ARGTYPE_Q), // pointer
+ ARGTYPE_B = ARGTYPE_I // bool
+ };
+
+ enum IndirectCall {
+ CALL_INDIRECT = 0
+ };
+
+ //-----------------------------------------------------------------------
+ // Aliasing
+ // --------
+ // *Aliasing* occurs when a single memory location can be accessed through
+ // multiple names. For example, consider this code:
+ //
+ // ld a[0]
+ // sti b[0]
+ // ld a[0]
+ //
+ // In general, it's possible that a[0] and b[0] may refer to the same
+ // memory location. This means, for example, that you cannot safely
+ // perform CSE on the two loads. However, if you know that 'a' cannot be
+ // an alias of 'b' (ie. the two loads do not alias with the store) then
+ // you can safely perform CSE.
+ //
+ // Access regions
+ // --------------
+ // Doing alias analysis precisely is difficult. But it turns out that
+ // keeping track of aliasing at a coarse level is enough to help with many
+ // optimisations. So we conceptually divide the memory that is accessible
+ // from LIR into a small number of "access regions" (aka. "Acc"). An
+ // access region may be non-contiguous. No two access regions can
+ // overlap. The union of all access regions covers all memory accessible
+ // from LIR.
+ //
+ // In general a (static) load or store may be executed more than once, and
+ // thus may access multiple regions; however, in practice almost all
+ // loads and stores will obviously access only a single region. A
+ // function called from LIR may load and/or store multiple access regions
+ // (even if executed only once).
+ //
+ // If two loads/stores/calls are known to not access the same region(s),
+ // then they do not alias.
+ //
+ // All regions are defined by the embedding. It makes sense to add new
+ // embedding-specific access regions when doing so will help with one or
+ // more optimisations.
+ //
+ // Access region sets and instruction markings
+ // -------------------------------------------
+ // Each load/store is marked with an "access region set" (aka. "AccSet"),
+ // which is a set of one or more access regions. This indicates which
+ // parts of LIR-accessible memory the load/store may touch.
+ //
+ // Each function called from LIR is also marked with an access region set
+ // for memory stored to by the function. (We could also have a marking
+ // for memory loads done by the function, but there's no need at the
+ // moment.) These markings apply to the function itself, not the call
+ // site, ie. they're not context-sensitive.
+ //
+ // These load/store/call markings MUST BE ACCURATE -- if not then invalid
+ // optimisations might occur that change the meaning of the code.
+ // However, they can safely be imprecise (ie. conservative), ie. a
+ // load/store/call can be marked with an access region set that is a
+ // superset of the actual access region set. Such imprecision is safe but
+ // may reduce optimisation opportunities.
+ //
+ // Optimisations that use access region info
+ // -----------------------------------------
+ // Currently only CseFilter uses this, and only for determining whether
+ // loads can be CSE'd. Note that CseFilter treats loads that are marked
+ // with a single access region precisely, but all loads marked with
+ // multiple access regions get lumped together. So if you can't mark a
+ // load with a single access region, you might as well use ACC_LOAD_ANY.
+ //-----------------------------------------------------------------------
+
+ // An access region set is represented as a bitset. Using a uint32_t
+ // restricts us to at most 32 alias regions for the moment. This could be
+ // expanded to a uint64_t easily if needed.
+ typedef uint32_t AccSet;
+ static const int NUM_ACCS = sizeof(AccSet) * 8;
+
+ // Some common (non-singleton) access region sets. ACCSET_NONE does not make
+ // sense for loads or stores (which must access at least one region), it
+ // only makes sense for calls.
+ //
+ static const AccSet ACCSET_NONE = 0x0;
+ static const AccSet ACCSET_ALL = 0xffffffff;
+ static const AccSet ACCSET_LOAD_ANY = ACCSET_ALL; // synonym
+ static const AccSet ACCSET_STORE_ANY = ACCSET_ALL; // synonym
+
+ inline bool isSingletonAccSet(AccSet accSet) {
+ // This is a neat way of testing if a value has only one bit set.
+ return (accSet & (accSet - 1)) == 0;
+ }
+
+ // Full AccSets don't fit into load and store instructions. But
+ // load/store AccSets almost always contain a single access region. We
+ // take advantage of this to create a compressed AccSet, MiniAccSet, that
+ // does fit.
+ //
+ // The 32 single-region AccSets get compressed into a number in the range
+ // 0..31 (according to the position of the set bit), and all other
+ // (multi-region) AccSets get converted into MINI_ACCSET_MULTIPLE. So the
+ // representation is lossy in the latter case, but that case is rare for
+ // loads/stores. We use a full AccSet for the storeAccSets of calls, for
+ // which multi-region AccSets are common.
+ //
+ // We wrap the uint8_t inside a struct to avoid the possiblity of subtle
+ // bugs caused by mixing up AccSet and MiniAccSet, which is easy to do.
+ // However, the struct gets padded inside LInsLd in an inconsistent way on
+ // Windows, so we actually store a MiniAccSetVal inside LInsLd. Sigh.
+ // But we use MiniAccSet everywhere else.
+ //
+ typedef uint8_t MiniAccSetVal;
+ struct MiniAccSet { MiniAccSetVal val; };
+ static const MiniAccSet MINI_ACCSET_MULTIPLE = { 99 };
+
+ static MiniAccSet compressAccSet(AccSet accSet) {
+ if (isSingletonAccSet(accSet)) {
+ MiniAccSet ret = { uint8_t(msbSet32(accSet)) };
+ return ret;
+ }
+
+ // If we got here, it must be a multi-region AccSet.
+ return MINI_ACCSET_MULTIPLE;
+ }
+
+ static AccSet decompressMiniAccSet(MiniAccSet miniAccSet) {
+ return (miniAccSet.val == MINI_ACCSET_MULTIPLE.val) ? ACCSET_ALL : (1 << miniAccSet.val);
+ }
+
+ // The LoadQual affects how a load can be optimised:
+ //
+ // - CONST: These loads are guaranteed to always return the same value
+ // during a single execution of a fragment (but the value is allowed to
+ // change between executions of the fragment). This means that the
+ // location is never stored to by the LIR, and is never modified by an
+ // external entity while the fragment is running.
+ //
+ // - NORMAL: These loads may be stored to by the LIR, but are never
+ // modified by an external entity while the fragment is running.
+ //
+ // - VOLATILE: These loads may be stored to by the LIR, and may be
+ // modified by an external entity while the fragment is running.
+ //
+ // This gives a lattice with the ordering: CONST < NORMAL < VOLATILE.
+ // As usual, it's safe to mark a load with a value higher (less precise)
+ // that actual, but it may result in fewer optimisations occurring.
+ //
+ // Generally CONST loads are highly amenable to optimisation (eg. CSE),
+ // VOLATILE loads are entirely unoptimisable, and NORMAL loads are in
+ // between and require some alias analysis to optimise.
+ //
+ // Note that CONST has a stronger meaning to "const" in C and C++; in C
+ // and C++ a "const" variable may be modified by an external entity, such
+ // as hardware. Hence "const volatile" makes sense in C and C++, but
+ // CONST+VOLATILE doesn't make sense in LIR.
+ //
+ // Note also that a 2-bit bitfield in LInsLd is used to hold LoadQual
+ // values, so you can one add one more value without expanding it.
+ //
+ enum LoadQual {
+ LOAD_CONST = 0,
+ LOAD_NORMAL = 1,
+ LOAD_VOLATILE = 2
+ };
+
+ struct CallInfo
+ {
+ private:
+ // In CallInfo::_typesig, each entry is three bits.
+ static const int TYPESIG_FIELDSZB = 3;
+ static const int TYPESIG_FIELDMASK = 7;
+
+ public:
+ uintptr_t _address;
+ uint32_t _typesig:27; // 9 3-bit fields indicating arg type, by ARGTYPE above (including ret type): a1 a2 a3 a4 a5 ret
+ AbiKind _abi:3;
+ uint32_t _isPure:1; // _isPure=1 means no side-effects, result only depends on args
+ AccSet _storeAccSet; // access regions stored by the function
+ verbose_only ( const char* _name; )
+
+ // The following encode 'r func()' through to 'r func(a1, a2, a3, a4, a5, a6, a7, a8)'.
+ static inline uint32_t typeSig0(ArgType r) {
+ return r;
+ }
+ static inline uint32_t typeSig1(ArgType r, ArgType a1) {
+ return a1 << TYPESIG_FIELDSZB*1 | typeSig0(r);
+ }
+ static inline uint32_t typeSig2(ArgType r, ArgType a1, ArgType a2) {
+ return a1 << TYPESIG_FIELDSZB*2 | typeSig1(r, a2);
+ }
+ static inline uint32_t typeSig3(ArgType r, ArgType a1, ArgType a2, ArgType a3) {
+ return a1 << TYPESIG_FIELDSZB*3 | typeSig2(r, a2, a3);
+ }
+ static inline uint32_t typeSig4(ArgType r, ArgType a1, ArgType a2, ArgType a3, ArgType a4) {
+ return a1 << TYPESIG_FIELDSZB*4 | typeSig3(r, a2, a3, a4);
+ }
+ static inline uint32_t typeSig5(ArgType r, ArgType a1, ArgType a2, ArgType a3,
+ ArgType a4, ArgType a5) {
+ return a1 << TYPESIG_FIELDSZB*5 | typeSig4(r, a2, a3, a4, a5);
+ }
+ static inline uint32_t typeSig6(ArgType r, ArgType a1, ArgType a2, ArgType a3,
+ ArgType a4, ArgType a5, ArgType a6) {
+ return a1 << TYPESIG_FIELDSZB*6 | typeSig5(r, a2, a3, a4, a5, a6);
+ }
+ static inline uint32_t typeSig7(ArgType r, ArgType a1, ArgType a2, ArgType a3,
+ ArgType a4, ArgType a5, ArgType a6, ArgType a7) {
+ return a1 << TYPESIG_FIELDSZB*7 | typeSig6(r, a2, a3, a4, a5, a6, a7);
+ }
+ static inline uint32_t typeSig8(ArgType r, ArgType a1, ArgType a2, ArgType a3, ArgType a4,
+ ArgType a5, ArgType a6, ArgType a7, ArgType a8) {
+ return a1 << TYPESIG_FIELDSZB*8 | typeSig7(r, a2, a3, a4, a5, a6, a7, a8);
+ }
+ // Encode 'r func(a1, ..., aN))'
+ static inline uint32_t typeSigN(ArgType r, int N, ArgType a[]) {
+ uint32_t typesig = r;
+ for (int i = 0; i < N; i++) {
+ typesig |= a[i] << TYPESIG_FIELDSZB*(N-i);
+ }
+ return typesig;
+ }
+
+ uint32_t count_args() const;
+ uint32_t count_int32_args() const;
+ // Nb: uses right-to-left order, eg. sizes[0] is the size of the right-most arg.
+ // XXX: See bug 525815 for fixing this.
+ uint32_t getArgTypes(ArgType* types) const;
+
+ inline ArgType returnType() const {
+ return ArgType(_typesig & TYPESIG_FIELDMASK);
+ }
+
+ inline bool isIndirect() const {
+ return _address < 256;
+ }
+ };
+
+ /*
+ * Record for extra data used to compile switches as jump tables.
+ */
+ struct SwitchInfo
+ {
+ NIns** table; // Jump table; a jump address is NIns*
+ uint32_t count; // Number of table entries
+ // Index value at last execution of the switch. The index value
+ // is the offset into the jump table. Thus it is computed as
+ // (switch expression) - (lowest case value).
+ uint32_t index;
+ };
+
+ // Array holding the 'isCse' field from LIRopcode.tbl.
+ extern const int8_t isCses[]; // cannot be uint8_t, some values are negative
+
+ inline bool isCseOpcode(LOpcode op) {
+ NanoAssert(isCses[op] != -1); // see LIRopcode.tbl to understand this
+ return isCses[op] == 1;
+ }
+ inline bool isLiveOpcode(LOpcode op) {
+ return
+#if defined NANOJIT_64BIT
+ op == LIR_liveq ||
+#endif
+ op == LIR_livei || op == LIR_lived;
+ }
+ inline bool isRetOpcode(LOpcode op) {
+ return
+#if defined NANOJIT_64BIT
+ op == LIR_retq ||
+#endif
+ op == LIR_reti || op == LIR_retd;
+ }
+ inline bool isCmovOpcode(LOpcode op) {
+ return
+#if defined NANOJIT_64BIT
+ op == LIR_cmovq ||
+#endif
+ op == LIR_cmovi ||
+ op == LIR_cmovd;
+ }
+ inline bool isCmpIOpcode(LOpcode op) {
+ return LIR_eqi <= op && op <= LIR_geui;
+ }
+ inline bool isCmpSIOpcode(LOpcode op) {
+ return LIR_eqi <= op && op <= LIR_gei;
+ }
+ inline bool isCmpUIOpcode(LOpcode op) {
+ return LIR_eqi == op || (LIR_ltui <= op && op <= LIR_geui);
+ }
+#ifdef NANOJIT_64BIT
+ inline bool isCmpQOpcode(LOpcode op) {
+ return LIR_eqq <= op && op <= LIR_geuq;
+ }
+ inline bool isCmpSQOpcode(LOpcode op) {
+ return LIR_eqq <= op && op <= LIR_geq;
+ }
+ inline bool isCmpUQOpcode(LOpcode op) {
+ return LIR_eqq == op || (LIR_ltuq <= op && op <= LIR_geuq);
+ }
+#endif
+ inline bool isCmpDOpcode(LOpcode op) {
+ return LIR_eqd <= op && op <= LIR_ged;
+ }
+ inline bool isCmpOpcode(LOpcode op) {
+ return isCmpIOpcode(op) ||
+#if defined NANOJIT_64BIT
+ isCmpQOpcode(op) ||
+#endif
+ isCmpDOpcode(op);
+ }
+
+ inline LOpcode invertCondJmpOpcode(LOpcode op) {
+ NanoAssert(op == LIR_jt || op == LIR_jf);
+ return LOpcode(op ^ 1);
+ }
+ inline LOpcode invertCondGuardOpcode(LOpcode op) {
+ NanoAssert(op == LIR_xt || op == LIR_xf);
+ return LOpcode(op ^ 1);
+ }
+ inline LOpcode invertCmpOpcode(LOpcode op) {
+ NanoAssert(isCmpOpcode(op));
+ return LOpcode(op ^ 1);
+ }
+
+ inline LOpcode getCallOpcode(const CallInfo* ci) {
+ LOpcode op = LIR_callp;
+ switch (ci->returnType()) {
+ case ARGTYPE_V: op = LIR_callv; break;
+ case ARGTYPE_I:
+ case ARGTYPE_UI: op = LIR_calli; break;
+#ifdef NANOJIT_64BIT
+ case ARGTYPE_Q: op = LIR_callq; break;
+#endif
+ case ARGTYPE_D: op = LIR_calld; break;
+ default: NanoAssert(0); break;
+ }
+ return op;
+ }
+
+ LOpcode arithOpcodeD2I(LOpcode op);
+#ifdef NANOJIT_64BIT
+ LOpcode cmpOpcodeI2Q(LOpcode op);
+#endif
+ LOpcode cmpOpcodeD2I(LOpcode op);
+ LOpcode cmpOpcodeD2UI(LOpcode op);
+
+ // Array holding the 'repKind' field from LIRopcode.tbl.
+ extern const uint8_t repKinds[];
+
+ enum LTy {
+ LTy_V, // void: no value/no type
+ LTy_I, // int: 32-bit integer
+#ifdef NANOJIT_64BIT
+ LTy_Q, // quad: 64-bit integer
+#endif
+ LTy_D, // double: 64-bit float
+
+ LTy_P = PTR_SIZE(LTy_I, LTy_Q) // word-sized integer
+ };
+
+ // Array holding the 'retType' field from LIRopcode.tbl.
+ extern const LTy retTypes[];
+
+ inline RegisterMask rmask(Register r)
+ {
+ return RegisterMask(1) << REGNUM(r);
+ }
+
+ //-----------------------------------------------------------------------
+ // Low-level instructions. This is a bit complicated, because we have a
+ // variable-width representation to minimise space usage.
+ //
+ // - Instruction size is always an integral multiple of word size.
+ //
+ // - Every instruction has at least one word, holding the opcode and the
+ // reservation info ("SharedFields"). That word is in class LIns.
+ //
+ // - Beyond that, most instructions have 1, 2 or 3 extra words. These
+ // extra words are in classes LInsOp1, LInsOp2, etc (collectively called
+ // "LInsXYZ" in what follows). Each LInsXYZ class also contains an LIns,
+ // accessible by the 'ins' member, which holds the LIns data.
+ //
+ // - LIR is written forward, but read backwards. When reading backwards,
+ // in order to find the opcode, it must be in a predictable place in the
+ // LInsXYZ isn't affected by instruction width. Therefore, the LIns
+ // word (which contains the opcode) is always the *last* word in an
+ // instruction.
+ //
+ // - Each instruction is created by casting pre-allocated bytes from a
+ // LirBuffer to the LInsXYZ type. Therefore there are no constructors
+ // for LIns or LInsXYZ.
+ //
+ // - The standard handle for an instruction is a LIns*. This actually
+ // points to the LIns word, ie. to the final word in the instruction.
+ // This is a bit odd, but it allows the instruction's opcode to be
+ // easily accessed. Once you've looked at the opcode and know what kind
+ // of instruction it is, if you want to access any of the other words,
+ // you need to use toLInsXYZ(), which takes the LIns* and gives you an
+ // LInsXYZ*, ie. the pointer to the actual start of the instruction's
+ // bytes. From there you can access the instruction-specific extra
+ // words.
+ //
+ // - However, from outside class LIns, LInsXYZ isn't visible, nor is
+ // toLInsXYZ() -- from outside LIns, all LIR instructions are handled
+ // via LIns pointers and get/set methods are used for all LIns/LInsXYZ
+ // accesses. In fact, all data members in LInsXYZ are private and can
+ // only be accessed by LIns, which is a friend class. The only thing
+ // anyone outside LIns can do with a LInsXYZ is call getLIns().
+ //
+ // - An example Op2 instruction and the likely pointers to it (each line
+ // represents a word, and pointers to a line point to the start of the
+ // word on that line):
+ //
+ // [ oprnd_2 <-- LInsOp2* insOp2 == toLInsOp2(ins)
+ // oprnd_1
+ // opcode + resv ] <-- LIns* ins
+ //
+ // - LIR_skip instructions are used to link code chunks. If the first
+ // instruction on a chunk isn't a LIR_start, it will be a skip, and the
+ // skip's operand will point to the last LIns on the preceding chunk.
+ // LInsSk has the same layout as LInsOp1, but we represent it as a
+ // different class because there are some places where we treat
+ // skips specially and so having it separate seems like a good idea.
+ //
+ // - Various things about the size and layout of LIns and LInsXYZ are
+ // statically checked in staticSanityCheck(). In particular, this is
+ // worthwhile because there's nothing that guarantees that all the
+ // LInsXYZ classes have a size that is a multiple of word size (but in
+ // practice all sane compilers use a layout that results in this). We
+ // also check that every LInsXYZ is word-aligned in
+ // LirBuffer::makeRoom(); this seems sensible to avoid potential
+ // slowdowns due to misalignment. It relies on chunks themselves being
+ // word-aligned, which is extremely likely.
+ //
+ // - There is an enum, LInsRepKind, with one member for each of the
+ // LInsXYZ kinds. Each opcode is categorised with its LInsRepKind value
+ // in LIRopcode.tbl, and this is used in various places.
+ //-----------------------------------------------------------------------
+
+ enum LInsRepKind {
+ // LRK_XYZ corresponds to class LInsXYZ.
+ LRK_Op0,
+ LRK_Op1,
+ LRK_Op2,
+ LRK_Op3,
+ LRK_Ld,
+ LRK_St,
+ LRK_Sk,
+ LRK_C,
+ LRK_P,
+ LRK_I,
+ LRK_QorD,
+ LRK_Jtbl,
+ LRK_None // this one is used for unused opcode numbers
+ };
+
+ class LInsOp0;
+ class LInsOp1;
+ class LInsOp2;
+ class LInsOp3;
+ class LInsLd;
+ class LInsSt;
+ class LInsSk;
+ class LInsC;
+ class LInsP;
+ class LInsI;
+ class LInsQorD;
+ class LInsJtbl;
+
+ class LIns
+ {
+ private:
+ // SharedFields: fields shared by all LIns kinds.
+ //
+ // The .inReg, .regnum, .inAr and .arIndex fields form a "reservation"
+ // that is used temporarily during assembly to record information
+ // relating to register allocation. See class RegAlloc for more
+ // details. Note: all combinations of .inReg/.inAr are possible, ie.
+ // 0/0, 0/1, 1/0, 1/1.
+ //
+ // The .isResultLive field is only used for instructions that return
+ // results. It indicates if the result is live. It's set (if
+ // appropriate) and used only during the codegen pass.
+ //
+ struct SharedFields {
+ uint32_t inReg:1; // if 1, 'reg' is active
+ uint32_t regnum:7;
+ uint32_t inAr:1; // if 1, 'arIndex' is active
+ uint32_t isResultLive:1; // if 1, the instruction's result is live
+
+ uint32_t arIndex:14; // index into stack frame; displ is -4*arIndex
+
+ LOpcode opcode:8; // instruction's opcode
+ };
+
+ union {
+ SharedFields sharedFields;
+ // Force sizeof(LIns)==8 and 8-byte alignment on 64-bit machines.
+ // This is necessary because sizeof(SharedFields)==4 and we want all
+ // instances of LIns to be pointer-aligned.
+ void* wholeWord;
+ };
+
+ inline void initSharedFields(LOpcode opcode)
+ {
+ // We must zero .inReg, .inAR and .isResultLive, but zeroing the
+ // whole word is easier. Then we set the opcode.
+ wholeWord = 0;
+ sharedFields.opcode = opcode;
+ }
+
+ // LIns-to-LInsXYZ converters.
+ inline LInsOp0* toLInsOp0() const;
+ inline LInsOp1* toLInsOp1() const;
+ inline LInsOp2* toLInsOp2() const;
+ inline LInsOp3* toLInsOp3() const;
+ inline LInsLd* toLInsLd() const;
+ inline LInsSt* toLInsSt() const;
+ inline LInsSk* toLInsSk() const;
+ inline LInsC* toLInsC() const;
+ inline LInsP* toLInsP() const;
+ inline LInsI* toLInsI() const;
+ inline LInsQorD* toLInsQorD() const;
+ inline LInsJtbl*toLInsJtbl()const;
+
+ void staticSanityCheck();
+
+ public:
+ // LIns initializers.
+ inline void initLInsOp0(LOpcode opcode);
+ inline void initLInsOp1(LOpcode opcode, LIns* oprnd1);
+ inline void initLInsOp2(LOpcode opcode, LIns* oprnd1, LIns* oprnd2);
+ inline void initLInsOp3(LOpcode opcode, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3);
+ inline void initLInsLd(LOpcode opcode, LIns* val, int32_t d, AccSet accSet, LoadQual loadQual);
+ inline void initLInsSt(LOpcode opcode, LIns* val, LIns* base, int32_t d, AccSet accSet);
+ inline void initLInsSk(LIns* prevLIns);
+ // Nb: args[] must be allocated and initialised before being passed in;
+ // initLInsC() just copies the pointer into the LInsC.
+ inline void initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci);
+ inline void initLInsP(int32_t arg, int32_t kind);
+ inline void initLInsI(LOpcode opcode, int32_t immI);
+ inline void initLInsQorD(LOpcode opcode, uint64_t immQorD);
+ inline void initLInsJtbl(LIns* index, uint32_t size, LIns** table);
+
+ LOpcode opcode() const { return sharedFields.opcode; }
+
+ // Generally, void instructions (statements) are always live and
+ // non-void instructions (expressions) are live if used by another
+ // live instruction. But there are some trickier cases.
+ // Any non-void instruction can be marked isResultLive=1 even
+ // when it is unreachable, e.g. due to an always-taken branch.
+ // The assembler marks it live if it sees any uses, regardless of
+ // whether those uses are in reachable code or not.
+ bool isLive() const {
+ return isV() ||
+ sharedFields.isResultLive ||
+ (isCall() && !callInfo()->_isPure) || // impure calls are always live
+ isop(LIR_paramp); // LIR_paramp is always live
+ }
+ void setResultLive() {
+ NanoAssert(!isV());
+ sharedFields.isResultLive = 1;
+ }
+
+ // XXX: old reservation manipulating functions. See bug 538924.
+ // Replacement strategy:
+ // - deprecated_markAsClear() --> clearReg() and/or clearArIndex()
+ // - deprecated_hasKnownReg() --> isInReg()
+ // - deprecated_getReg() --> getReg() after checking isInReg()
+ //
+ void deprecated_markAsClear() {
+ sharedFields.inReg = 0;
+ sharedFields.inAr = 0;
+ }
+ bool deprecated_hasKnownReg() {
+ NanoAssert(isExtant());
+ return isInReg();
+ }
+ Register deprecated_getReg() {
+ NanoAssert(isExtant());
+ if (isInReg()) {
+ Register r = { sharedFields.regnum };
+ return r;
+ } else {
+ return deprecated_UnknownReg;
+ }
+ }
+ uint32_t deprecated_getArIndex() {
+ NanoAssert(isExtant());
+ return ( isInAr() ? sharedFields.arIndex : 0 );
+ }
+
+ // Reservation manipulation.
+ //
+ // "Extant" mean "in existence, still existing, surviving". In other
+ // words, has the value been computed explicitly (not folded into
+ // something else) and is it still available (in a register or spill
+ // slot) for use?
+ bool isExtant() {
+ return isInReg() || isInAr();
+ }
+ bool isInReg() {
+ return sharedFields.inReg;
+ }
+ bool isInRegMask(RegisterMask allow) {
+ return isInReg() && (rmask(getReg()) & allow);
+ }
+ Register getReg() {
+ NanoAssert(isInReg());
+ Register r = { sharedFields.regnum };
+ return r;
+ }
+ void setReg(Register r) {
+ sharedFields.inReg = 1;
+ sharedFields.regnum = REGNUM(r);
+ }
+ void clearReg() {
+ sharedFields.inReg = 0;
+ }
+ bool isInAr() {
+ return sharedFields.inAr;
+ }
+ uint32_t getArIndex() {
+ NanoAssert(isInAr());
+ return sharedFields.arIndex;
+ }
+ void setArIndex(uint32_t arIndex) {
+ sharedFields.inAr = 1;
+ sharedFields.arIndex = arIndex;
+ }
+ void clearArIndex() {
+ sharedFields.inAr = 0;
+ }
+
+ // For various instruction kinds.
+ inline LIns* oprnd1() const;
+ inline LIns* oprnd2() const;
+ inline LIns* oprnd3() const;
+
+ // For branches.
+ inline LIns* getTarget() const;
+ inline void setTarget(LIns* label);
+
+ // For guards.
+ inline GuardRecord* record() const;
+
+ // For loads.
+ inline LoadQual loadQual() const;
+
+ // For loads/stores.
+ inline int32_t disp() const;
+ inline MiniAccSet miniAccSet() const;
+ inline AccSet accSet() const;
+
+ // For LInsSk.
+ inline LIns* prevLIns() const;
+
+ // For LInsP.
+ inline uint8_t paramArg() const;
+ inline uint8_t paramKind() const;
+
+ // For LInsI.
+ inline int32_t immI() const;
+
+ // For LInsQorD.
+#ifdef NANOJIT_64BIT
+ inline int32_t immQlo() const;
+ inline uint64_t immQ() const;
+#endif
+ inline int32_t immDlo() const;
+ inline int32_t immDhi() const;
+ inline double immD() const;
+ inline uint64_t immDasQ() const;
+
+ // For LIR_allocp.
+ inline int32_t size() const;
+ inline void setSize(int32_t nbytes);
+
+ // For LInsC.
+ inline LIns* arg(uint32_t i) const; // right-to-left-order: arg(0) is rightmost
+ inline uint32_t argc() const;
+ inline LIns* callArgN(uint32_t n) const;
+ inline const CallInfo* callInfo() const;
+
+ // For LIR_jtbl
+ inline uint32_t getTableSize() const;
+ inline LIns* getTarget(uint32_t index) const;
+ inline void setTarget(uint32_t index, LIns* label) const;
+
+ // isLInsXYZ() returns true if the instruction has the LInsXYZ form.
+ // Note that there is some overlap with other predicates, eg.
+ // isStore()==isLInsSt(), isCall()==isLInsC(), but that's ok; these
+ // ones are used mostly to check that opcodes are appropriate for
+ // instruction layouts, the others are used for non-debugging
+ // purposes.
+ bool isLInsOp0() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_Op0 == repKinds[opcode()];
+ }
+ bool isLInsOp1() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_Op1 == repKinds[opcode()];
+ }
+ bool isLInsOp2() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_Op2 == repKinds[opcode()];
+ }
+ bool isLInsOp3() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_Op3 == repKinds[opcode()];
+ }
+ bool isLInsLd() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_Ld == repKinds[opcode()];
+ }
+ bool isLInsSt() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_St == repKinds[opcode()];
+ }
+ bool isLInsSk() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_Sk == repKinds[opcode()];
+ }
+ bool isLInsC() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_C == repKinds[opcode()];
+ }
+ bool isLInsP() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_P == repKinds[opcode()];
+ }
+ bool isLInsI() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_I == repKinds[opcode()];
+ }
+ bool isLInsQorD() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_QorD == repKinds[opcode()];
+ }
+ bool isLInsJtbl() const {
+ NanoAssert(LRK_None != repKinds[opcode()]);
+ return LRK_Jtbl == repKinds[opcode()];
+ }
+
+ // LIns predicates.
+ bool isop(LOpcode o) const {
+ return opcode() == o;
+ }
+ bool isRet() const {
+ return isRetOpcode(opcode());
+ }
+ bool isCmp() const {
+ return isCmpOpcode(opcode());
+ }
+ bool isCall() const {
+ return isop(LIR_callv) ||
+ isop(LIR_calli) ||
+#if defined NANOJIT_64BIT
+ isop(LIR_callq) ||
+#endif
+ isop(LIR_calld);
+ }
+ bool isCmov() const {
+ return isCmovOpcode(opcode());
+ }
+ bool isStore() const {
+ return isLInsSt();
+ }
+ bool isLoad() const {
+ return isLInsLd();
+ }
+ bool isGuard() const {
+ return isop(LIR_x) || isop(LIR_xf) || isop(LIR_xt) ||
+ isop(LIR_xbarrier) || isop(LIR_xtbl) ||
+ isop(LIR_addxovi) || isop(LIR_subxovi) || isop(LIR_mulxovi);
+ }
+ bool isJov() const {
+ return
+#ifdef NANOJIT_64BIT
+ isop(LIR_addjovq) || isop(LIR_subjovq) ||
+#endif
+ isop(LIR_addjovi) || isop(LIR_subjovi) || isop(LIR_muljovi);
+ }
+ // True if the instruction is a 32-bit integer immediate.
+ bool isImmI() const {
+ return isop(LIR_immi);
+ }
+ // True if the instruction is a 32-bit integer immediate and
+ // has the value 'val' when treated as a 32-bit signed integer.
+ bool isImmI(int32_t val) const {
+ return isImmI() && immI()==val;
+ }
+#ifdef NANOJIT_64BIT
+ // True if the instruction is a 64-bit integer immediate.
+ bool isImmQ() const {
+ return isop(LIR_immq);
+ }
+#endif
+ // True if the instruction is a pointer-sized integer immediate.
+ bool isImmP() const
+ {
+#ifdef NANOJIT_64BIT
+ return isImmQ();
+#else
+ return isImmI();
+#endif
+ }
+ // True if the instruction is a 64-bit float immediate.
+ bool isImmD() const {
+ return isop(LIR_immd);
+ }
+ // True if the instruction is a 64-bit integer or float immediate.
+ bool isImmQorD() const {
+ return
+#ifdef NANOJIT_64BIT
+ isImmQ() ||
+#endif
+ isImmD();
+ }
+ // True if the instruction an any type of immediate.
+ bool isImmAny() const {
+ return isImmI() || isImmQorD();
+ }
+
+ bool isBranch() const {
+ return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl) || isJov();
+ }
+
+ LTy retType() const {
+ return retTypes[opcode()];
+ }
+ bool isV() const {
+ return retType() == LTy_V;
+ }
+ bool isI() const {
+ return retType() == LTy_I;
+ }
+#ifdef NANOJIT_64BIT
+ bool isQ() const {
+ return retType() == LTy_Q;
+ }
+#endif
+ bool isD() const {
+ return retType() == LTy_D;
+ }
+ bool isQorD() const {
+ return
+#ifdef NANOJIT_64BIT
+ isQ() ||
+#endif
+ isD();
+ }
+ bool isP() const {
+#ifdef NANOJIT_64BIT
+ return isQ();
+#else
+ return isI();
+#endif
+ }
+
+ inline void* immP() const
+ {
+ #ifdef NANOJIT_64BIT
+ return (void*)immQ();
+ #else
+ return (void*)immI();
+ #endif
+ }
+ };
+
+ typedef SeqBuilder<LIns*> InsList;
+ typedef SeqBuilder<char*> StringList;
+
+
+ // 0-operand form. Used for LIR_start and LIR_label.
+ class LInsOp0
+ {
+ private:
+ friend class LIns;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // 1-operand form. Used for LIR_reti, unary arithmetic/logic ops, etc.
+ class LInsOp1
+ {
+ private:
+ friend class LIns;
+
+ LIns* oprnd_1;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // 2-operand form. Used for guards, branches, comparisons, binary
+ // arithmetic/logic ops, etc.
+ class LInsOp2
+ {
+ private:
+ friend class LIns;
+
+ LIns* oprnd_2;
+
+ LIns* oprnd_1;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // 3-operand form. Used for conditional moves, jov branches, and xov guards.
+ class LInsOp3
+ {
+ private:
+ friend class LIns;
+
+ LIns* oprnd_3;
+
+ LIns* oprnd_2;
+
+ LIns* oprnd_1;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // Used for all loads.
+ class LInsLd
+ {
+ private:
+ friend class LIns;
+
+ // Nb: the LIR writer pipeline handles things if a displacement
+ // exceeds 16 bits. This is rare, but does happen occasionally. We
+ // could go to 24 bits but then it would happen so rarely that the
+ // handler code would be difficult to test and thus untrustworthy.
+ //
+ // Nb: the types of these bitfields are all 32-bit integers to ensure
+ // they are fully packed on Windows, sigh. Also, 'loadQual' is
+ // unsigned to ensure the values 0, 1, and 2 all fit in 2 bits.
+ //
+ // Nb: explicit signed keyword for bitfield types is required,
+ // some compilers may treat them as unsigned without it.
+ // See Bugzilla 584219 comment #18
+ signed int disp:16;
+ signed int miniAccSetVal:8;
+ uint32_t loadQual:2;
+
+ LIns* oprnd_1;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // Used for all stores.
+ class LInsSt
+ {
+ private:
+ friend class LIns;
+
+ int16_t disp;
+ MiniAccSetVal miniAccSetVal;
+
+ LIns* oprnd_2;
+
+ LIns* oprnd_1;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // Used for LIR_skip.
+ class LInsSk
+ {
+ private:
+ friend class LIns;
+
+ LIns* prevLIns;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // Used for all variants of LIR_call.
+ class LInsC
+ {
+ private:
+ friend class LIns;
+
+ // Arguments in reverse order, just like insCall() (ie. args[0] holds
+ // the rightmost arg). The array should be allocated by the same
+ // allocator as the LIR buffers, because it has the same lifetime.
+ LIns** args;
+
+ const CallInfo* ci;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // Used for LIR_paramp.
+ class LInsP
+ {
+ private:
+ friend class LIns;
+
+ uintptr_t arg:8;
+ uintptr_t kind:8;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // Used for LIR_immi and LIR_allocp.
+ class LInsI
+ {
+ private:
+ friend class LIns;
+
+ int32_t immI;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // Used for LIR_immq and LIR_immd.
+ class LInsQorD
+ {
+ private:
+ friend class LIns;
+
+ int32_t immQorDlo;
+
+ int32_t immQorDhi;
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; };
+ };
+
+ // Used for LIR_jtbl. 'oprnd_1' must be a uint32_t index in
+ // the range 0 <= index < size; no range check is performed.
+ // 'table' is an array of labels.
+ class LInsJtbl
+ {
+ private:
+ friend class LIns;
+
+ uint32_t size; // number of entries in table
+ LIns** table; // pointer to table[size] with same lifetime as this LInsJtbl
+ LIns* oprnd_1; // uint32_t index expression
+
+ LIns ins;
+
+ public:
+ LIns* getLIns() { return &ins; }
+ };
+
+ // Used only as a placeholder for OP___ macros for unused opcodes in
+ // LIRopcode.tbl.
+ class LInsNone
+ {
+ };
+
+ LInsOp0* LIns::toLInsOp0() const { return (LInsOp0* )(uintptr_t(this+1) - sizeof(LInsOp0 )); }
+ LInsOp1* LIns::toLInsOp1() const { return (LInsOp1* )(uintptr_t(this+1) - sizeof(LInsOp1 )); }
+ LInsOp2* LIns::toLInsOp2() const { return (LInsOp2* )(uintptr_t(this+1) - sizeof(LInsOp2 )); }
+ LInsOp3* LIns::toLInsOp3() const { return (LInsOp3* )(uintptr_t(this+1) - sizeof(LInsOp3 )); }
+ LInsLd* LIns::toLInsLd() const { return (LInsLd* )(uintptr_t(this+1) - sizeof(LInsLd )); }
+ LInsSt* LIns::toLInsSt() const { return (LInsSt* )(uintptr_t(this+1) - sizeof(LInsSt )); }
+ LInsSk* LIns::toLInsSk() const { return (LInsSk* )(uintptr_t(this+1) - sizeof(LInsSk )); }
+ LInsC* LIns::toLInsC() const { return (LInsC* )(uintptr_t(this+1) - sizeof(LInsC )); }
+ LInsP* LIns::toLInsP() const { return (LInsP* )(uintptr_t(this+1) - sizeof(LInsP )); }
+ LInsI* LIns::toLInsI() const { return (LInsI* )(uintptr_t(this+1) - sizeof(LInsI )); }
+ LInsQorD* LIns::toLInsQorD() const { return (LInsQorD*)(uintptr_t(this+1) - sizeof(LInsQorD)); }
+ LInsJtbl* LIns::toLInsJtbl() const { return (LInsJtbl*)(uintptr_t(this+1) - sizeof(LInsJtbl)); }
+
+ void LIns::initLInsOp0(LOpcode opcode) {
+ initSharedFields(opcode);
+ NanoAssert(isLInsOp0());
+ }
+ void LIns::initLInsOp1(LOpcode opcode, LIns* oprnd1) {
+ initSharedFields(opcode);
+ toLInsOp1()->oprnd_1 = oprnd1;
+ NanoAssert(isLInsOp1());
+ }
+ void LIns::initLInsOp2(LOpcode opcode, LIns* oprnd1, LIns* oprnd2) {
+ initSharedFields(opcode);
+ toLInsOp2()->oprnd_1 = oprnd1;
+ toLInsOp2()->oprnd_2 = oprnd2;
+ NanoAssert(isLInsOp2());
+ }
+ void LIns::initLInsOp3(LOpcode opcode, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3) {
+ initSharedFields(opcode);
+ toLInsOp3()->oprnd_1 = oprnd1;
+ toLInsOp3()->oprnd_2 = oprnd2;
+ toLInsOp3()->oprnd_3 = oprnd3;
+ NanoAssert(isLInsOp3());
+ }
+ void LIns::initLInsLd(LOpcode opcode, LIns* val, int32_t d, AccSet accSet, LoadQual loadQual) {
+ initSharedFields(opcode);
+ toLInsLd()->oprnd_1 = val;
+ NanoAssert(d == int16_t(d));
+ toLInsLd()->disp = int16_t(d);
+ toLInsLd()->miniAccSetVal = compressAccSet(accSet).val;
+ toLInsLd()->loadQual = loadQual;
+ NanoAssert(isLInsLd());
+ }
+ void LIns::initLInsSt(LOpcode opcode, LIns* val, LIns* base, int32_t d, AccSet accSet) {
+ initSharedFields(opcode);
+ toLInsSt()->oprnd_1 = val;
+ toLInsSt()->oprnd_2 = base;
+ NanoAssert(d == int16_t(d));
+ toLInsSt()->disp = int16_t(d);
+ toLInsSt()->miniAccSetVal = compressAccSet(accSet).val;
+ NanoAssert(isLInsSt());
+ }
+ void LIns::initLInsSk(LIns* prevLIns) {
+ initSharedFields(LIR_skip);
+ toLInsSk()->prevLIns = prevLIns;
+ NanoAssert(isLInsSk());
+ }
+ void LIns::initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci) {
+ initSharedFields(opcode);
+ toLInsC()->args = args;
+ toLInsC()->ci = ci;
+ NanoAssert(isLInsC());
+ }
+ void LIns::initLInsP(int32_t arg, int32_t kind) {
+ initSharedFields(LIR_paramp);
+ NanoAssert(isU8(arg) && isU8(kind));
+ toLInsP()->arg = arg;
+ toLInsP()->kind = kind;
+ NanoAssert(isLInsP());
+ }
+ void LIns::initLInsI(LOpcode opcode, int32_t immI) {
+ initSharedFields(opcode);
+ toLInsI()->immI = immI;
+ NanoAssert(isLInsI());
+ }
+ void LIns::initLInsQorD(LOpcode opcode, uint64_t immQorD) {
+ initSharedFields(opcode);
+ toLInsQorD()->immQorDlo = int32_t(immQorD);
+ toLInsQorD()->immQorDhi = int32_t(immQorD >> 32);
+ NanoAssert(isLInsQorD());
+ }
+ void LIns::initLInsJtbl(LIns* index, uint32_t size, LIns** table) {
+ initSharedFields(LIR_jtbl);
+ toLInsJtbl()->oprnd_1 = index;
+ toLInsJtbl()->table = table;
+ toLInsJtbl()->size = size;
+ NanoAssert(isLInsJtbl());
+ }
+
+ LIns* LIns::oprnd1() const {
+ NanoAssert(isLInsOp1() || isLInsOp2() || isLInsOp3() || isLInsLd() || isLInsSt() || isLInsJtbl());
+ return toLInsOp2()->oprnd_1;
+ }
+ LIns* LIns::oprnd2() const {
+ NanoAssert(isLInsOp2() || isLInsOp3() || isLInsSt());
+ return toLInsOp2()->oprnd_2;
+ }
+ LIns* LIns::oprnd3() const {
+ NanoAssert(isLInsOp3());
+ return toLInsOp3()->oprnd_3;
+ }
+
+ LIns* LIns::getTarget() const {
+ NanoAssert(isBranch() && !isop(LIR_jtbl));
+ if (isJov())
+ return oprnd3();
+ else
+ return oprnd2();
+ }
+
+ void LIns::setTarget(LIns* label) {
+ NanoAssert(label && label->isop(LIR_label));
+ NanoAssert(isBranch() && !isop(LIR_jtbl));
+ if (isJov())
+ toLInsOp3()->oprnd_3 = label;
+ else
+ toLInsOp2()->oprnd_2 = label;
+ }
+
+ LIns* LIns::getTarget(uint32_t index) const {
+ NanoAssert(isop(LIR_jtbl));
+ NanoAssert(index < toLInsJtbl()->size);
+ return toLInsJtbl()->table[index];
+ }
+
+ void LIns::setTarget(uint32_t index, LIns* label) const {
+ NanoAssert(label && label->isop(LIR_label));
+ NanoAssert(isop(LIR_jtbl));
+ NanoAssert(index < toLInsJtbl()->size);
+ toLInsJtbl()->table[index] = label;
+ }
+
+ GuardRecord *LIns::record() const {
+ NanoAssert(isGuard());
+ switch (opcode()) {
+ case LIR_x:
+ case LIR_xt:
+ case LIR_xf:
+ case LIR_xtbl:
+ case LIR_xbarrier:
+ return (GuardRecord*)oprnd2();
+
+ case LIR_addxovi:
+ case LIR_subxovi:
+ case LIR_mulxovi:
+ return (GuardRecord*)oprnd3();
+
+ default:
+ NanoAssert(0);
+ return NULL;
+ }
+ }
+
+ LoadQual LIns::loadQual() const {
+ NanoAssert(isLInsLd());
+ return (LoadQual)toLInsLd()->loadQual;
+ }
+
+ int32_t LIns::disp() const {
+ if (isLInsSt()) {
+ return toLInsSt()->disp;
+ } else {
+ NanoAssert(isLInsLd());
+ return toLInsLd()->disp;
+ }
+ }
+
+ MiniAccSet LIns::miniAccSet() const {
+ MiniAccSet miniAccSet;
+ if (isLInsSt()) {
+ miniAccSet.val = toLInsSt()->miniAccSetVal;
+ } else {
+ NanoAssert(isLInsLd());
+ miniAccSet.val = toLInsLd()->miniAccSetVal;
+ }
+ return miniAccSet;
+ }
+
+ AccSet LIns::accSet() const {
+ return decompressMiniAccSet(miniAccSet());
+ }
+
+ LIns* LIns::prevLIns() const {
+ NanoAssert(isLInsSk());
+ return toLInsSk()->prevLIns;
+ }
+
+ inline uint8_t LIns::paramArg() const { NanoAssert(isop(LIR_paramp)); return toLInsP()->arg; }
+ inline uint8_t LIns::paramKind() const { NanoAssert(isop(LIR_paramp)); return toLInsP()->kind; }
+
+ inline int32_t LIns::immI() const { NanoAssert(isImmI()); return toLInsI()->immI; }
+
+#ifdef NANOJIT_64BIT
+ inline int32_t LIns::immQlo() const { NanoAssert(isImmQ()); return toLInsQorD()->immQorDlo; }
+ uint64_t LIns::immQ() const {
+ NanoAssert(isImmQ());
+ return (uint64_t(toLInsQorD()->immQorDhi) << 32) | uint32_t(toLInsQorD()->immQorDlo);
+ }
+#endif
+ inline int32_t LIns::immDlo() const { NanoAssert(isImmD()); return toLInsQorD()->immQorDlo; }
+ inline int32_t LIns::immDhi() const { NanoAssert(isImmD()); return toLInsQorD()->immQorDhi; }
+ double LIns::immD() const {
+ NanoAssert(isImmD());
+ union {
+ double f;
+ uint64_t q;
+ } u;
+ u.q = immDasQ();
+ return u.f;
+ }
+ uint64_t LIns::immDasQ() const {
+ NanoAssert(isImmD());
+ return (uint64_t(toLInsQorD()->immQorDhi) << 32) | uint32_t(toLInsQorD()->immQorDlo);
+ }
+
+ int32_t LIns::size() const {
+ NanoAssert(isop(LIR_allocp));
+ return toLInsI()->immI << 2;
+ }
+
+ void LIns::setSize(int32_t nbytes) {
+ NanoAssert(isop(LIR_allocp));
+ NanoAssert(nbytes > 0);
+ toLInsI()->immI = (nbytes+3)>>2; // # of required 32bit words
+ }
+
+ // Index args in reverse order, i.e. arg(0) returns the rightmost arg.
+ // Nb: this must be kept in sync with insCall().
+ LIns* LIns::arg(uint32_t i) const
+ {
+ NanoAssert(isCall());
+ NanoAssert(i < callInfo()->count_args());
+ return toLInsC()->args[i]; // args[] is in right-to-left order as well
+ }
+
+ uint32_t LIns::argc() const {
+ return callInfo()->count_args();
+ }
+
+ LIns* LIns::callArgN(uint32_t n) const
+ {
+ return arg(argc()-n-1);
+ }
+
+ const CallInfo* LIns::callInfo() const
+ {
+ NanoAssert(isCall());
+ return toLInsC()->ci;
+ }
+
+ uint32_t LIns::getTableSize() const
+ {
+ NanoAssert(isLInsJtbl());
+ return toLInsJtbl()->size;
+ }
+
+ class LirWriter
+ {
+ public:
+ LirWriter *out;
+
+ LirWriter(LirWriter* out)
+ : out(out) {}
+ virtual ~LirWriter() {}
+
+ virtual LIns* ins0(LOpcode v) {
+ return out->ins0(v);
+ }
+ virtual LIns* ins1(LOpcode v, LIns* a) {
+ return out->ins1(v, a);
+ }
+ virtual LIns* ins2(LOpcode v, LIns* a, LIns* b) {
+ return out->ins2(v, a, b);
+ }
+ virtual LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c) {
+ return out->ins3(v, a, b, c);
+ }
+ virtual LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr) {
+ return out->insGuard(v, c, gr);
+ }
+ virtual LIns* insGuardXov(LOpcode v, LIns *a, LIns* b, GuardRecord *gr) {
+ return out->insGuardXov(v, a, b, gr);
+ }
+ virtual LIns* insBranch(LOpcode v, LIns* condition, LIns* to) {
+ return out->insBranch(v, condition, to);
+ }
+ virtual LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
+ return out->insBranchJov(v, a, b, to);
+ }
+ // arg: 0=first, 1=second, ...
+ // kind: 0=arg 1=saved-reg
+ virtual LIns* insParam(int32_t arg, int32_t kind) {
+ return out->insParam(arg, kind);
+ }
+ virtual LIns* insImmI(int32_t imm) {
+ return out->insImmI(imm);
+ }
+#ifdef NANOJIT_64BIT
+ virtual LIns* insImmQ(uint64_t imm) {
+ return out->insImmQ(imm);
+ }
+#endif
+ virtual LIns* insImmD(double d) {
+ return out->insImmD(d);
+ }
+ virtual LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual) {
+ return out->insLoad(op, base, d, accSet, loadQual);
+ }
+ virtual LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet) {
+ return out->insStore(op, value, base, d, accSet);
+ }
+ // args[] is in reverse order, ie. args[0] holds the rightmost arg.
+ virtual LIns* insCall(const CallInfo *call, LIns* args[]) {
+ return out->insCall(call, args);
+ }
+ virtual LIns* insAlloc(int32_t size) {
+ NanoAssert(size != 0);
+ return out->insAlloc(size);
+ }
+ virtual LIns* insJtbl(LIns* index, uint32_t size) {
+ return out->insJtbl(index, size);
+ }
+ virtual LIns* insComment(const char* str) {
+ return out->insComment(str);
+ }
+
+ // convenience functions
+
+ // Inserts a conditional to execute and branches to execute if
+ // the condition is true and false respectively.
+ LIns* insChoose(LIns* cond, LIns* iftrue, LIns* iffalse, bool use_cmov);
+
+ // Inserts an integer comparison to 0
+ LIns* insEqI_0(LIns* oprnd1) {
+ return ins2ImmI(LIR_eqi, oprnd1, 0);
+ }
+
+ // Inserts a pointer comparison to 0
+ LIns* insEqP_0(LIns* oprnd1) {
+ return ins2(LIR_eqp, oprnd1, insImmWord(0));
+ }
+
+ // Inserts a binary operation where the second operand is an
+ // integer immediate.
+ LIns* ins2ImmI(LOpcode v, LIns* oprnd1, int32_t imm) {
+ return ins2(v, oprnd1, insImmI(imm));
+ }
+
+ LIns* insImmP(const void *ptr) {
+#ifdef NANOJIT_64BIT
+ return insImmQ((uint64_t)ptr);
+#else
+ return insImmI((int32_t)ptr);
+#endif
+ }
+
+ LIns* insImmWord(intptr_t value) {
+#ifdef NANOJIT_64BIT
+ return insImmQ(value);
+#else
+ return insImmI(value);
+#endif
+ }
+
+ // Sign-extend integers to native integers. On 32-bit this is a no-op.
+ LIns* insI2P(LIns* intIns) {
+#ifdef NANOJIT_64BIT
+ return ins1(LIR_i2q, intIns);
+#else
+ return intIns;
+#endif
+ }
+
+ // Zero-extend integers to native integers. On 32-bit this is a no-op.
+ LIns* insUI2P(LIns* uintIns) {
+ #ifdef NANOJIT_64BIT
+ return ins1(LIR_ui2uq, uintIns);
+ #else
+ return uintIns;
+ #endif
+ }
+
+ // Do a load with LoadQual==LOAD_NORMAL.
+ LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet) {
+ return insLoad(op, base, d, accSet, LOAD_NORMAL);
+ }
+
+ // Chooses LIR_sti, LIR_stq or LIR_std according to the type of 'value'.
+ LIns* insStore(LIns* value, LIns* base, int32_t d, AccSet accSet);
+ };
+
+
+#ifdef NJ_VERBOSE
+ extern const char* lirNames[];
+
+ // Maps address ranges to meaningful names.
+ class AddrNameMap
+ {
+ Allocator& allocator;
+ class Entry
+ {
+ public:
+ Entry(int) : name(0), size(0), align(0) {}
+ Entry(char *n, size_t s, size_t a) : name(n), size(s), align(a) {}
+ char* name;
+ size_t size:29, align:3;
+ };
+ TreeMap<const void*, Entry*> names; // maps code regions to names
+ public:
+ AddrNameMap(Allocator& allocator);
+ void addAddrRange(const void *p, size_t size, size_t align, const char *name);
+ void lookupAddr(void *p, char*& name, int32_t& offset);
+ };
+
+ // Maps LIR instructions to meaningful names.
+ class LirNameMap
+ {
+ private:
+ Allocator& alloc;
+
+ // A small string-wrapper class, required because we need '==' to
+ // compare string contents, not string pointers, when strings are used
+ // as keys in CountMap.
+ struct Str {
+ Allocator& alloc;
+ char* s;
+
+ Str(Allocator& alloc_, const char* s_) : alloc(alloc_) {
+ s = new (alloc) char[1+strlen(s_)];
+ strcpy(s, s_);
+ }
+
+ bool operator==(const Str& str) const {
+ return (0 == strcmp(this->s, str.s));
+ }
+ };
+
+ // Similar to 'struct Str' -- we need to hash the string's contents,
+ // not its pointer.
+ template<class K> struct StrHash {
+ static size_t hash(const Str &k) {
+ // (const void*) cast is required by ARM RVCT 2.2
+ return murmurhash((const void*)k.s, strlen(k.s));
+ }
+ };
+
+ template <class Key, class H=DefaultHash<Key> >
+ class CountMap: public HashMap<Key, int, H> {
+ public:
+ CountMap(Allocator& alloc) : HashMap<Key, int, H>(alloc, 128) {}
+ int add(Key k) {
+ int c = 1;
+ if (this->containsKey(k)) {
+ c = 1+this->get(k);
+ }
+ this->put(k,c);
+ return c;
+ }
+ };
+
+ CountMap<int> lircounts;
+ CountMap<const CallInfo *> funccounts;
+ CountMap<Str, StrHash<Str> > namecounts;
+
+ void addNameWithSuffix(LIns* i, const char *s, int suffix, bool ignoreOneSuffix);
+
+ class Entry
+ {
+ public:
+ Entry(int) : name(0) {}
+ Entry(char* n) : name(n) {}
+ char* name;
+ };
+
+ HashMap<LIns*, Entry*> names;
+
+ public:
+ LirNameMap(Allocator& alloc)
+ : alloc(alloc),
+ lircounts(alloc),
+ funccounts(alloc),
+ namecounts(alloc),
+ names(alloc)
+ {}
+
+ void addName(LIns* ins, const char *s); // gives 'ins' a special name
+ const char* createName(LIns* ins); // gives 'ins' a generic name
+ const char* lookupName(LIns* ins);
+ };
+
+ // We use big buffers for cases where we need to fit a whole instruction,
+ // and smaller buffers for all the others. These should easily be long
+ // enough, but for safety the formatXyz() functions check and won't exceed
+ // those limits.
+ class InsBuf {
+ public:
+ static const size_t len = 1000;
+ char buf[len];
+ };
+ class RefBuf {
+ public:
+ static const size_t len = 200;
+ char buf[len];
+ };
+
+ class LInsPrinter
+ {
+ private:
+ Allocator& alloc;
+ const int EMB_NUM_USED_ACCS;
+
+ char *formatImmI(RefBuf* buf, int32_t c);
+#ifdef NANOJIT_64BIT
+ char *formatImmQ(RefBuf* buf, uint64_t c);
+#endif
+ char *formatImmD(RefBuf* buf, double c);
+ void formatGuard(InsBuf* buf, LIns* ins); // defined by the embedder
+ void formatGuardXov(InsBuf* buf, LIns* ins); // defined by the embedder
+
+ public:
+ static const char* accNames[]; // defined by the embedder
+
+ LInsPrinter(Allocator& alloc, int embNumUsedAccs)
+ : alloc(alloc), EMB_NUM_USED_ACCS(embNumUsedAccs)
+ {
+ addrNameMap = new (alloc) AddrNameMap(alloc);
+ lirNameMap = new (alloc) LirNameMap(alloc);
+ }
+
+ char *formatAddr(RefBuf* buf, void* p);
+ char *formatRef(RefBuf* buf, LIns* ref, bool showImmValue = true);
+ char *formatIns(InsBuf* buf, LIns* ins);
+ char *formatAccSet(RefBuf* buf, AccSet accSet);
+
+ AddrNameMap* addrNameMap;
+ LirNameMap* lirNameMap;
+ };
+
+
+ class VerboseWriter : public LirWriter
+ {
+ InsList code;
+ LInsPrinter* printer;
+ LogControl* logc;
+ const char* const prefix;
+ bool const always_flush;
+ public:
+ VerboseWriter(Allocator& alloc, LirWriter *out, LInsPrinter* printer, LogControl* logc,
+ const char* prefix = "", bool always_flush = false)
+ : LirWriter(out), code(alloc), printer(printer), logc(logc), prefix(prefix), always_flush(always_flush)
+ {}
+
+ LIns* add(LIns* i) {
+ if (i) {
+ code.add(i);
+ if (always_flush)
+ flush();
+ }
+ return i;
+ }
+
+ LIns* add_flush(LIns* i) {
+ if ((i = add(i)) != 0)
+ flush();
+ return i;
+ }
+
+ void flush()
+ {
+ if (!code.isEmpty()) {
+ InsBuf b;
+ for (Seq<LIns*>* p = code.get(); p != NULL; p = p->tail)
+ logc->printf("%s %s\n", prefix, printer->formatIns(&b, p->head));
+ code.clear();
+ }
+ }
+
+ LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr) {
+ return add_flush(out->insGuard(op,cond,gr));
+ }
+
+ LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr) {
+ return add(out->insGuardXov(op,a,b,gr));
+ }
+
+ LIns* insBranch(LOpcode v, LIns* condition, LIns* to) {
+ return add_flush(out->insBranch(v, condition, to));
+ }
+
+ LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
+ return add(out->insBranchJov(v, a, b, to));
+ }
+
+ LIns* insJtbl(LIns* index, uint32_t size) {
+ return add_flush(out->insJtbl(index, size));
+ }
+
+ LIns* ins0(LOpcode v) {
+ if (v == LIR_label || v == LIR_start) {
+ flush();
+ }
+ return add(out->ins0(v));
+ }
+
+ LIns* ins1(LOpcode v, LIns* a) {
+ return isRetOpcode(v) ? add_flush(out->ins1(v, a)) : add(out->ins1(v, a));
+ }
+ LIns* ins2(LOpcode v, LIns* a, LIns* b) {
+ return add(out->ins2(v, a, b));
+ }
+ LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c) {
+ return add(out->ins3(v, a, b, c));
+ }
+ LIns* insCall(const CallInfo *call, LIns* args[]) {
+ return add_flush(out->insCall(call, args));
+ }
+ LIns* insParam(int32_t i, int32_t kind) {
+ return add(out->insParam(i, kind));
+ }
+ LIns* insLoad(LOpcode v, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual) {
+ return add(out->insLoad(v, base, disp, accSet, loadQual));
+ }
+ LIns* insStore(LOpcode op, LIns* v, LIns* b, int32_t d, AccSet accSet) {
+ return add_flush(out->insStore(op, v, b, d, accSet));
+ }
+ LIns* insAlloc(int32_t size) {
+ return add(out->insAlloc(size));
+ }
+ LIns* insImmI(int32_t imm) {
+ return add(out->insImmI(imm));
+ }
+#ifdef NANOJIT_64BIT
+ LIns* insImmQ(uint64_t imm) {
+ return add(out->insImmQ(imm));
+ }
+#endif
+ LIns* insImmD(double d) {
+ return add(out->insImmD(d));
+ }
+
+ LIns* insComment(const char* str) {
+ return add_flush(out->insComment(str));
+ }
+ };
+
+#endif
+
+ class ExprFilter: public LirWriter
+ {
+ public:
+ ExprFilter(LirWriter *out) : LirWriter(out) {}
+ LIns* ins1(LOpcode v, LIns* a);
+ LIns* ins2(LOpcode v, LIns* a, LIns* b);
+ LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
+ LIns* insGuard(LOpcode, LIns* cond, GuardRecord *);
+ LIns* insGuardXov(LOpcode, LIns* a, LIns* b, GuardRecord *);
+ LIns* insBranch(LOpcode, LIns* cond, LIns* target);
+ LIns* insBranchJov(LOpcode, LIns* a, LIns* b, LIns* target);
+ LIns* insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet, LoadQual loadQual);
+ private:
+ LIns* simplifyOverflowArith(LOpcode op, LIns** opnd1, LIns** opnd2);
+ };
+
+ class CseFilter: public LirWriter
+ {
+ enum NLKind {
+ // We divide instruction kinds into groups. LIns0 isn't present
+ // because we don't need to record any 0-ary instructions. Loads
+ // aren't here, they're handled separately.
+ NLImmISmall = 0,
+ NLImmILarge = 1,
+ NLImmQ = 2, // only occurs on 64-bit platforms
+ NLImmD = 3,
+ NL1 = 4,
+ NL2 = 5,
+ NL3 = 6,
+ NLCall = 7,
+
+ NLFirst = 0,
+ NLLast = 7,
+ // Need a value after "last" to outsmart compilers that insist last+1 is impossible.
+ NLInvalid = 8
+ };
+ #define nextNLKind(kind) NLKind(kind+1)
+
+ // There is one table for each NLKind. This lets us size the lists
+ // appropriately (some instruction kinds are more common than others).
+ // It also lets us have NLKind-specific find/add/grow functions, which
+ // are faster than generic versions.
+ //
+ // Nb: m_listNL and m_capNL sizes must be a power of 2.
+ // Don't start m_capNL too small, or we'll waste time growing and rehashing.
+ // Don't start m_capNL too large, will waste memory.
+ //
+ LIns** m_listNL[NLLast + 1];
+ uint32_t m_capNL[ NLLast + 1];
+ uint32_t m_usedNL[NLLast + 1];
+ typedef uint32_t (CseFilter::*find_t)(LIns*);
+ find_t m_findNL[NLLast + 1];
+
+ // Similarly, for loads, there is one table for each CseAcc. A CseAcc
+ // is like a normal access region, but there are two extra possible
+ // values: CSE_ACC_CONST, which is where we put all CONST-qualified
+ // loads, and CSE_ACC_MULTIPLE, where we put all multi-region loads.
+ // All remaining loads are single-region and go in the table entry for
+ // their region.
+ //
+ // This arrangement makes the removal of invalidated loads fast -- we
+ // can invalidate all loads from a single region by clearing that
+ // region's table.
+ //
+ typedef uint8_t CseAcc; // same type as MiniAccSet
+
+ static const uint8_t CSE_NUM_ACCS = NUM_ACCS + 2;
+
+ // These values would be 'static const' except they are defined in
+ // terms of EMB_NUM_USED_ACCS which is itself not 'static const'
+ // because it's passed in by the embedding.
+ const uint8_t EMB_NUM_USED_ACCS; // number of access regions used by the embedding
+ const uint8_t CSE_NUM_USED_ACCS; // EMB_NUM_USED_ACCS + 2
+ const CseAcc CSE_ACC_CONST; // EMB_NUM_USED_ACCS + 0
+ const CseAcc CSE_ACC_MULTIPLE; // EMB_NUM_USED_ACCS + 1
+
+ // We will only use CSE_NUM_USED_ACCS of these entries, ie. the
+ // number of lists allocated depends on the number of access regions
+ // in use by the embedding.
+ LIns** m_listL[CSE_NUM_ACCS];
+ uint32_t m_capL[ CSE_NUM_ACCS];
+ uint32_t m_usedL[CSE_NUM_ACCS];
+
+ AccSet storesSinceLastLoad; // regions stored to since the last load
+
+ Allocator& alloc;
+
+ // After a conditional guard such as "xf cmp", we know that 'cmp' must
+ // be true, else we would have side-exited. So if we see 'cmp' again
+ // we can treat it like a constant. This table records such
+ // comparisons.
+ HashMap <LIns*, bool> knownCmpValues;
+
+ // If true, we will not add new instructions to the CSE tables, but we
+ // will continue to CSE instructions that match existing table
+ // entries. Load instructions will still be removed if aliasing
+ // stores are encountered.
+ bool suspended;
+
+ CseAcc miniAccSetToCseAcc(MiniAccSet miniAccSet, LoadQual loadQual) {
+ NanoAssert(miniAccSet.val < NUM_ACCS || miniAccSet.val == MINI_ACCSET_MULTIPLE.val);
+ return (loadQual == LOAD_CONST) ? CSE_ACC_CONST :
+ (miniAccSet.val == MINI_ACCSET_MULTIPLE.val) ? CSE_ACC_MULTIPLE :
+ miniAccSet.val;
+ }
+
+ static uint32_t hash8(uint32_t hash, const uint8_t data);
+ static uint32_t hash32(uint32_t hash, const uint32_t data);
+ static uint32_t hashptr(uint32_t hash, const void* data);
+ static uint32_t hashfinish(uint32_t hash);
+
+ static uint32_t hashImmI(int32_t);
+ static uint32_t hashImmQorD(uint64_t); // not NANOJIT_64BIT-only -- used by findImmD()
+ static uint32_t hash1(LOpcode op, LIns*);
+ static uint32_t hash2(LOpcode op, LIns*, LIns*);
+ static uint32_t hash3(LOpcode op, LIns*, LIns*, LIns*);
+ static uint32_t hashLoad(LOpcode op, LIns*, int32_t);
+ static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]);
+
+ // These versions are used before an LIns has been created.
+ LIns* findImmISmall(int32_t a, uint32_t &k);
+ LIns* findImmILarge(int32_t a, uint32_t &k);
+#ifdef NANOJIT_64BIT
+ LIns* findImmQ(uint64_t a, uint32_t &k);
+#endif
+ LIns* findImmD(uint64_t d, uint32_t &k);
+ LIns* find1(LOpcode v, LIns* a, uint32_t &k);
+ LIns* find2(LOpcode v, LIns* a, LIns* b, uint32_t &k);
+ LIns* find3(LOpcode v, LIns* a, LIns* b, LIns* c, uint32_t &k);
+ LIns* findLoad(LOpcode v, LIns* a, int32_t b, MiniAccSet miniAccSet, LoadQual loadQual,
+ uint32_t &k);
+ LIns* findCall(const CallInfo *call, uint32_t argc, LIns* args[], uint32_t &k);
+
+ // These versions are used after an LIns has been created; they are
+ // used for rehashing after growing. They just call onto the
+ // multi-arg versions above.
+ uint32_t findImmISmall(LIns* ins);
+ uint32_t findImmILarge(LIns* ins);
+#ifdef NANOJIT_64BIT
+ uint32_t findImmQ(LIns* ins);
+#endif
+ uint32_t findImmD(LIns* ins);
+ uint32_t find1(LIns* ins);
+ uint32_t find2(LIns* ins);
+ uint32_t find3(LIns* ins);
+ uint32_t findCall(LIns* ins);
+ uint32_t findLoad(LIns* ins);
+
+ // These return false if they failed to grow due to OOM.
+ bool growNL(NLKind kind);
+ bool growL(CseAcc cseAcc);
+
+ void addNLImmISmall(LIns* ins, uint32_t k);
+ // 'k' is the index found by findXYZ().
+ void addNL(NLKind kind, LIns* ins, uint32_t k);
+ void addL(LIns* ins, uint32_t k);
+
+ void clearAll(); // clears all tables
+ void clearNL(NLKind); // clears one non-load table
+ void clearL(CseAcc); // clears one load table
+
+ public:
+ CseFilter(LirWriter *out, uint8_t embNumUsedAccs, Allocator&);
+
+ // CseFilter does some largish fallible allocations at start-up. If
+ // they fail, the constructor sets this field to 'true'. It should be
+ // checked after creation, and if set the CseFilter cannot be used.
+ // (But the check can be skipped if allocChunk() always succeeds.)
+ //
+ // FIXME: This fallibility is a sop to TraceMonkey's implementation of
+ // infallible malloc -- by avoiding some largish infallible
+ // allocations, it reduces the size of the reserve space needed.
+ // Bug 624590 is open to fix this.
+ bool initOOM;
+
+ LIns* insImmI(int32_t imm);
+#ifdef NANOJIT_64BIT
+ LIns* insImmQ(uint64_t q);
+#endif
+ LIns* insImmD(double d);
+ LIns* ins0(LOpcode v);
+ LIns* ins1(LOpcode v, LIns*);
+ LIns* ins2(LOpcode v, LIns*, LIns*);
+ LIns* ins3(LOpcode v, LIns*, LIns*, LIns*);
+ LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual);
+ LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet);
+ LIns* insCall(const CallInfo *call, LIns* args[]);
+ LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr);
+ LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr);
+
+ // These functions provide control over CSE in the face of control
+ // flow. A suspend()/resume() pair may be put around a synthetic
+ // control flow diamond, preventing the inserted label from resetting
+ // the CSE state. A suspend() call must be dominated by a resume()
+ // call, else incorrect code could result.
+ void suspend() { suspended = true; }
+ void resume() { suspended = false; }
+ };
+
+ class LirBuffer
+ {
+ public:
+ LirBuffer(Allocator& alloc);
+ void clear();
+ uintptr_t makeRoom(size_t szB); // make room for an instruction
+
+ debug_only (void validate() const;)
+ verbose_only(LInsPrinter* printer;)
+
+ int32_t insCount();
+
+ // stats
+ struct
+ {
+ uint32_t lir; // # instructions
+ }
+ _stats;
+
+ AbiKind abi;
+ LIns *state, *param1, *sp, *rp;
+ LIns* savedRegs[NumSavedRegs+1]; // Allocate an extra element in case NumSavedRegs == 0
+
+ /** Each chunk is just a raw area of LIns instances, with no header
+ and no more than 8-byte alignment. The chunk size is somewhat arbitrary. */
+ static const size_t CHUNK_SZB = 8000;
+
+ protected:
+ friend class LirBufWriter;
+
+ /** Get CHUNK_SZB more memory for LIR instructions. */
+ void chunkAlloc();
+ void moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk);
+
+ Allocator& _allocator;
+ uintptr_t _unused; // next unused instruction slot in the current LIR chunk
+ uintptr_t _limit; // one past the last usable byte of the current LIR chunk
+ };
+
+ class LirBufWriter : public LirWriter
+ {
+ LirBuffer* _buf; // underlying buffer housing the instructions
+ const Config& _config;
+
+ public:
+ LirBufWriter(LirBuffer* buf, const Config& config)
+ : LirWriter(0), _buf(buf), _config(config) {
+ }
+
+ // LirWriter interface
+ LIns* insLoad(LOpcode op, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual);
+ LIns* insStore(LOpcode op, LIns* o1, LIns* o2, int32_t disp, AccSet accSet);
+ LIns* ins0(LOpcode op);
+ LIns* ins1(LOpcode op, LIns* o1);
+ LIns* ins2(LOpcode op, LIns* o1, LIns* o2);
+ LIns* ins3(LOpcode op, LIns* o1, LIns* o2, LIns* o3);
+ LIns* insParam(int32_t i, int32_t kind);
+ LIns* insImmI(int32_t imm);
+#ifdef NANOJIT_64BIT
+ LIns* insImmQ(uint64_t imm);
+#endif
+ LIns* insImmD(double d);
+ LIns* insCall(const CallInfo *call, LIns* args[]);
+ LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr);
+ LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr);
+ LIns* insBranch(LOpcode v, LIns* condition, LIns* to);
+ LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
+ LIns* insAlloc(int32_t size);
+ LIns* insJtbl(LIns* index, uint32_t size);
+ LIns* insComment(const char* str);
+ };
+
+ class LirFilter
+ {
+ public:
+ LirFilter *in;
+ LirFilter(LirFilter *in) : in(in) {}
+ virtual ~LirFilter(){}
+
+ // It's crucial that once this reaches the LIR_start at the beginning
+ // of the buffer, that it just keeps returning that LIR_start LIns on
+ // any subsequent calls.
+ virtual LIns* read() {
+ return in->read();
+ }
+ virtual LIns* finalIns() {
+ return in->finalIns();
+ }
+ };
+
+ // concrete
+ class LirReader : public LirFilter
+ {
+ LIns* _ins; // next instruction to be read; invariant: is never a skip
+ LIns* _finalIns; // final instruction in the stream; ie. the first one to be read
+
+ public:
+ LirReader(LIns* ins) : LirFilter(0), _ins(ins), _finalIns(ins)
+ {
+ // The last instruction for a fragment shouldn't be a skip.
+ // (Actually, if the last *inserted* instruction exactly fills up
+ // a chunk, a new chunk will be created, and thus the last *written*
+ // instruction will be a skip -- the one needed for the
+ // cross-chunk link. But the last *inserted* instruction is what
+ // is recorded and used to initialise each LirReader, and that is
+ // what is seen here, and therefore this assertion holds.)
+ NanoAssert(ins && !ins->isop(LIR_skip));
+ }
+ virtual ~LirReader() {}
+
+ // Returns next instruction and advances to the prior instruction.
+ // Invariant: never returns a skip.
+ LIns* read();
+
+ LIns* finalIns() {
+ return _finalIns;
+ }
+ };
+
+ verbose_only(void live(LirFilter* in, Allocator& alloc, Fragment* frag, LogControl*);)
+
+ // WARNING: StackFilter assumes that all stack entries are eight bytes.
+ // Some of its optimisations aren't valid if that isn't true. See
+ // StackFilter::read() for more details.
+ class StackFilter: public LirFilter
+ {
+ LIns* sp;
+ BitSet stk;
+ int top;
+ int getTop(LIns* br);
+
+ public:
+ StackFilter(LirFilter *in, Allocator& alloc, LIns* sp);
+ LIns* read();
+ };
+
+ // This type is used to perform a simple interval analysis of 32-bit
+ // add/sub/mul. It lets us avoid overflow checks in some cases.
+ struct Interval
+ {
+ // The bounds are 64-bit integers so that any overflow from a 32-bit
+ // operation can be safely detected.
+ //
+ // If 'hasOverflowed' is false, 'lo' and 'hi' must be in the range
+ // I32_MIN..I32_MAX. If 'hasOverflowed' is true, 'lo' and 'hi' should
+ // not be trusted (and in debug builds we set them both to a special
+ // value UNTRUSTWORTHY that is outside the I32_MIN..I32_MAX range to
+ // facilitate sanity checking).
+ //
+ int64_t lo;
+ int64_t hi;
+ bool hasOverflowed;
+
+ static const int64_t I32_MIN = int64_t(int32_t(0x80000000));
+ static const int64_t I32_MAX = int64_t(int32_t(0x7fffffff));
+
+#ifdef DEBUG
+ static const int64_t UNTRUSTWORTHY = int64_t(0xdeafdeadbeeffeedLL);
+
+ bool isSane() {
+ return (hasOverflowed && lo == UNTRUSTWORTHY && hi == UNTRUSTWORTHY) ||
+ (!hasOverflowed && lo <= hi && I32_MIN <= lo && hi <= I32_MAX);
+ }
+#endif
+
+ Interval(int64_t lo_, int64_t hi_) {
+ if (lo_ < I32_MIN || I32_MAX < hi_) {
+ hasOverflowed = true;
+#ifdef DEBUG
+ lo = UNTRUSTWORTHY;
+ hi = UNTRUSTWORTHY;
+#endif
+ } else {
+ hasOverflowed = false;
+ lo = lo_;
+ hi = hi_;
+ }
+ NanoAssert(isSane());
+ }
+
+ static Interval OverflowInterval() {
+ Interval interval(0, 0);
+#ifdef DEBUG
+ interval.lo = UNTRUSTWORTHY;
+ interval.hi = UNTRUSTWORTHY;
+#endif
+ interval.hasOverflowed = true;
+ return interval;
+ }
+
+ static Interval of(LIns* ins, int32_t lim);
+
+ static Interval add(Interval x, Interval y);
+ static Interval sub(Interval x, Interval y);
+ static Interval mul(Interval x, Interval y);
+
+ bool canBeZero() {
+ NanoAssert(isSane());
+ return hasOverflowed || (lo <= 0 && 0 <= hi);
+ }
+
+ bool canBeNegative() {
+ NanoAssert(isSane());
+ return hasOverflowed || (lo < 0);
+ }
+ };
+
+#if NJ_SOFTFLOAT_SUPPORTED
+ struct SoftFloatOps
+ {
+ const CallInfo* opmap[LIR_sentinel];
+ SoftFloatOps();
+ };
+
+ extern const SoftFloatOps softFloatOps;
+
+ // Replaces fpu ops with function calls, for platforms lacking float
+ // hardware (eg. some ARM machines).
+ class SoftFloatFilter: public LirWriter
+ {
+ public:
+ static const CallInfo* opmap[LIR_sentinel];
+
+ SoftFloatFilter(LirWriter *out);
+ LIns *split(LIns *a);
+ LIns *split(const CallInfo *call, LIns* args[]);
+ LIns *callD1(const CallInfo *call, LIns *a);
+ LIns *callD2(const CallInfo *call, LIns *a, LIns *b);
+ LIns *callI1(const CallInfo *call, LIns *a);
+ LIns *cmpD(const CallInfo *call, LIns *a, LIns *b);
+ LIns *ins1(LOpcode op, LIns *a);
+ LIns *ins2(LOpcode op, LIns *a, LIns *b);
+ LIns *insCall(const CallInfo *ci, LIns* args[]);
+ };
+#endif
+
+#ifdef DEBUG
+ // This class does thorough checking of LIR. It checks *implicit* LIR
+ // instructions, ie. LIR instructions specified via arguments -- to
+ // methods like insLoad() -- that have not yet been converted into
+ // *explicit* LIns objects in a LirBuffer. The reason for this is that if
+ // we wait until the LIR instructions are explicit, they will have gone
+ // through the entire writer pipeline and been optimised. By checking
+ // implicit LIR instructions we can check the LIR code at the start of the
+ // writer pipeline, exactly as it is generated by the compiler front-end.
+ //
+ // A general note about the errors produced by this class: for
+ // TraceMonkey, they won't include special names for instructions that
+ // have them unless TMFLAGS is specified.
+ class ValidateWriter : public LirWriter
+ {
+ private:
+ LInsPrinter* printer;
+ const char* whereInPipeline;
+
+ const char* type2string(LTy type);
+ void typeCheckArgs(LOpcode op, int nArgs, LTy formals[], LIns* args[]);
+ void errorStructureShouldBe(LOpcode op, const char* argDesc, int argN, LIns* arg,
+ const char* shouldBeDesc);
+ void errorAccSet(const char* what, AccSet accSet, const char* shouldDesc);
+ void errorLoadQual(const char* what, LoadQual loadQual);
+ void checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2);
+ void checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins);
+ void checkLInsIsNull(LOpcode op, int argN, LIns* ins);
+ void checkAccSet(LOpcode op, LIns* base, int32_t disp, AccSet accSet); // defined by the embedder
+
+ // These can be set by the embedder and used in checkAccSet().
+ void** checkAccSetExtras;
+
+ public:
+ ValidateWriter(LirWriter* out, LInsPrinter* printer, const char* where);
+ void setCheckAccSetExtras(void** extras) { checkAccSetExtras = extras; }
+
+ LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual);
+ LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet);
+ LIns* ins0(LOpcode v);
+ LIns* ins1(LOpcode v, LIns* a);
+ LIns* ins2(LOpcode v, LIns* a, LIns* b);
+ LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
+ LIns* insParam(int32_t arg, int32_t kind);
+ LIns* insImmI(int32_t imm);
+#ifdef NANOJIT_64BIT
+ LIns* insImmQ(uint64_t imm);
+#endif
+ LIns* insImmD(double d);
+ LIns* insCall(const CallInfo *call, LIns* args[]);
+ LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr);
+ LIns* insGuardXov(LOpcode v, LIns* a, LIns* b, GuardRecord* gr);
+ LIns* insBranch(LOpcode v, LIns* condition, LIns* to);
+ LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
+ LIns* insAlloc(int32_t size);
+ LIns* insJtbl(LIns* index, uint32_t size);
+ };
+
+ // This just checks things that aren't possible to check in
+ // ValidateWriter, eg. whether all branch targets are set and are labels.
+ class ValidateReader: public LirFilter {
+ public:
+ ValidateReader(LirFilter* in);
+ LIns* read();
+ };
+#endif
+
+#ifdef NJ_VERBOSE
+ /* A listing filter for LIR, going through backwards. It merely
+ passes its input to its output, but notes it down too. When
+ finish() is called, prints out what went through. Is intended to be
+ used to print arbitrary intermediate transformation stages of
+ LIR. */
+ class ReverseLister : public LirFilter
+ {
+ Allocator& _alloc;
+ LInsPrinter* _printer;
+ const char* _title;
+ StringList _strs;
+ LogControl* _logc;
+ LIns* _prevIns;
+ public:
+ ReverseLister(LirFilter* in, Allocator& alloc,
+ LInsPrinter* printer, LogControl* logc, const char* title)
+ : LirFilter(in)
+ , _alloc(alloc)
+ , _printer(printer)
+ , _title(title)
+ , _strs(alloc)
+ , _logc(logc)
+ , _prevIns(NULL)
+ { }
+
+ void finish();
+ LIns* read();
+ };
+#endif
+
+}
+#endif // __nanojit_LIR__