summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/ghc.cabal.in4
-rw-r--r--compiler/nativeGen/Alpha/Instr.hs140
-rw-r--r--compiler/nativeGen/MachInstrs.hs706
-rw-r--r--compiler/nativeGen/PPC/Instr.hs166
-rw-r--r--compiler/nativeGen/SPARC/Instr.hs188
-rw-r--r--compiler/nativeGen/X86/Instr.hs335
6 files changed, 867 insertions, 672 deletions
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 47b321aaf7..8fd470ca3c 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -456,6 +456,10 @@ Library
AsmCodeGen
MachCodeGen
MachInstrs
+ Alpha.Instr
+ X86.Instr
+ PPC.Instr
+ SPARC.Instr
MachRegs
NCGMonad
PositionIndependentCode
diff --git a/compiler/nativeGen/Alpha/Instr.hs b/compiler/nativeGen/Alpha/Instr.hs
new file mode 100644
index 0000000000..16ec53f299
--- /dev/null
+++ b/compiler/nativeGen/Alpha/Instr.hs
@@ -0,0 +1,140 @@
+-----------------------------------------------------------------------------
+--
+-- Machine-dependent assembly language
+--
+-- (c) The University of Glasgow 1993-2004
+--
+-----------------------------------------------------------------------------
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+
+module Alpha.Instr (
+ Cond(..),
+ Instr(..),
+ RI(..)
+)
+
+where
+
+import BlockId
+import MachRegs
+import Cmm
+import FastString
+import CLabel
+
+data Cond
+ = ALWAYS -- For BI (same as BR)
+ | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
+ | GE -- For BI only
+ | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
+ | LE -- For CMP and BI
+ | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
+ | NE -- For BI only
+ | NEVER -- For BI (null instruction)
+ | ULE -- For CMP only
+ | ULT -- For CMP only
+ deriving Eq
+
+
+-- -----------------------------------------------------------------------------
+-- Machine's assembly language
+
+-- We have a few common "instructions" (nearly all the pseudo-ops) but
+-- mostly all of 'Instr' is machine-specific.
+
+-- Register or immediate
+data RI
+ = RIReg Reg
+ | RIImm Imm
+
+data Instr
+ -- comment pseudo-op
+ = COMMENT FastString
+
+ -- some static data spat out during code
+ -- generation. Will be extracted before
+ -- pretty-printing.
+ | LDATA Section [CmmStatic]
+
+ -- start a new basic block. Useful during
+ -- codegen, removed later. Preceding
+ -- instruction should be a jump, as per the
+ -- invariants for a BasicBlock (see Cmm).
+ | NEWBLOCK BlockId
+
+ -- specify current stack offset for
+ -- benefit of subsequent passes
+ | DELTA Int
+
+ -- | spill this reg to a stack slot
+ | SPILL Reg Int
+
+ -- | reload this reg from a stack slot
+ | RELOAD Int Reg
+
+ -- Loads and stores.
+ | LD Size Reg AddrMode -- size, dst, src
+ | LDA Reg AddrMode -- dst, src
+ | LDAH Reg AddrMode -- dst, src
+ | LDGP Reg AddrMode -- dst, src
+ | LDI Size Reg Imm -- size, dst, src
+ | ST Size Reg AddrMode -- size, src, dst
+
+ -- Int Arithmetic.
+ | CLR Reg -- dst
+ | ABS Size RI Reg -- size, src, dst
+ | NEG Size Bool RI Reg -- size, overflow, src, dst
+ | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
+ | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
+ | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
+ | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
+ | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
+ | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
+ | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
+
+ -- Simple bit-twiddling.
+ | NOT RI Reg
+ | AND Reg RI Reg
+ | ANDNOT Reg RI Reg
+ | OR Reg RI Reg
+ | ORNOT Reg RI Reg
+ | XOR Reg RI Reg
+ | XORNOT Reg RI Reg
+ | SLL Reg RI Reg
+ | SRL Reg RI Reg
+ | SRA Reg RI Reg
+
+ | ZAP Reg RI Reg
+ | ZAPNOT Reg RI Reg
+
+ | NOP
+
+ -- Comparison
+ | CMP Cond Reg RI Reg
+
+ -- Float Arithmetic.
+ | FCLR Reg
+ | FABS Reg Reg
+ | FNEG Size Reg Reg
+ | FADD Size Reg Reg Reg
+ | FDIV Size Reg Reg Reg
+ | FMUL Size Reg Reg Reg
+ | FSUB Size Reg Reg Reg
+ | CVTxy Size Size Reg Reg
+ | FCMP Size Cond Reg Reg Reg
+ | FMOV Reg Reg
+
+ -- Jumping around.
+ | BI Cond Reg Imm
+ | BF Cond Reg Imm
+ | BR Imm
+ | JMP Reg AddrMode Int
+ | BSR Imm Int
+ | JSR Reg AddrMode Int
+
+ -- Alpha-specific pseudo-ops.
+ | FUNBEGIN CLabel
+ | FUNEND CLabel
+
+
diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs
index 529da0dd83..a3e2d2b901 100644
--- a/compiler/nativeGen/MachInstrs.hs
+++ b/compiler/nativeGen/MachInstrs.hs
@@ -15,28 +15,29 @@
#include "nativeGen/NCG.h"
-module MachInstrs (
- -- * Cmm instantiations
- NatCmm, NatCmmTop, NatBasicBlock,
- -- * Machine instructions
- Instr(..),
- Cond(..), condUnsigned, condToSigned, condToUnsigned,
-#if powerpc_TARGET_ARCH
- condNegate,
+module MachInstrs (
+ NatCmm,
+ NatCmmTop,
+ NatBasicBlock,
+ condUnsigned,
+ condToSigned,
+ condToUnsigned,
+
+#if alpha_TARGET_ARCH
+ module Alpha.Instr
+#elif powerpc_TARGET_ARCH
+ module PPC.Instr
+#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
+ module X86.Instr
+#elif sparc_TARGET_ARCH
+ module SPARC.Instr
+#else
+#error "MachInstrs: not defined for this architecture"
#endif
- RI(..),
+)
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
- Operand(..),
-#endif
-#if i386_TARGET_ARCH
- i386_insert_ffrees,
-#endif
-#if sparc_TARGET_ARCH
- riZero, fpRelEA, moveSp, fPair,
-#endif
- ) where
+where
#include "HsVersions.h"
@@ -51,83 +52,28 @@ import Constants ( wORD_SIZE )
import GHC.Exts
+#if alpha_TARGET_ARCH
+import Alpha.Instr
+#elif powerpc_TARGET_ARCH
+import PPC.Instr
+#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import X86.Instr
+#elif sparc_TARGET_ARCH
+import SPARC.Instr
+#else
+#error "MachInstrs: not defined for this architecture"
+#endif
--- -----------------------------------------------------------------------------
--- Our flavours of the Cmm types
+-- Our flavours of the Cmm types
-- Type synonyms for Cmm populated with native code
+
type NatCmm = GenCmm CmmStatic [CmmStatic] (ListGraph Instr)
type NatCmmTop = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr)
type NatBasicBlock = GenBasicBlock Instr
--- -----------------------------------------------------------------------------
--- Conditions on this architecture
-
-data Cond
-#if alpha_TARGET_ARCH
- = ALWAYS -- For BI (same as BR)
- | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
- | GE -- For BI only
- | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name)
- | LE -- For CMP and BI
- | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
- | NE -- For BI only
- | NEVER -- For BI (null instruction)
- | ULE -- For CMP only
- | ULT -- For CMP only
-#endif
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
- = ALWAYS -- What's really used? ToDo
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- | NEG
- | POS
- | CARRY
- | OFLO
- | PARITY
- | NOTPARITY
-#endif
-#if sparc_TARGET_ARCH
- = ALWAYS -- What's really used? ToDo
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- | NEG
- | NEVER
- | POS
- | VC
- | VS
-#endif
-#if powerpc_TARGET_ARCH
- = ALWAYS
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
-#endif
- deriving Eq -- to make an assertion work
+-- Condition utils
condUnsigned GU = True
condUnsigned LU = True
condUnsigned GEU = True
@@ -146,590 +92,6 @@ condToUnsigned GE = GEU
condToUnsigned LE = LEU
condToUnsigned x = x
-#if powerpc_TARGET_ARCH
-condNegate ALWAYS = panic "condNegate: ALWAYS"
-condNegate EQQ = NE
-condNegate GE = LTT
-condNegate GEU = LU
-condNegate GTT = LE
-condNegate GU = LEU
-condNegate LE = GTT
-condNegate LEU = GU
-condNegate LTT = GE
-condNegate LU = GEU
-condNegate NE = EQQ
-#endif
-
--- -----------------------------------------------------------------------------
--- Register or immediate (a handy type on some platforms)
-
-data RI = RIReg Reg
- | RIImm Imm
-
-
--- -----------------------------------------------------------------------------
--- Machine's assembly language
-
--- We have a few common "instructions" (nearly all the pseudo-ops) but
--- mostly all of 'Instr' is machine-specific.
-
-data Instr
- = COMMENT FastString -- comment pseudo-op
-
- | LDATA Section [CmmStatic] -- some static data spat out during code
- -- generation. Will be extracted before
- -- pretty-printing.
-
- | NEWBLOCK BlockId -- start a new basic block. Useful during
- -- codegen, removed later. Preceding
- -- instruction should be a jump, as per the
- -- invariants for a BasicBlock (see Cmm).
-
- | DELTA Int -- specify current stack offset for
- -- benefit of subsequent passes
-
- | SPILL Reg Int -- ^ spill this reg to a stack slot
- | RELOAD Int Reg -- ^ reload this reg from a stack slot
-
--- -----------------------------------------------------------------------------
--- Alpha instructions
-
-#if alpha_TARGET_ARCH
-
--- data Instr continues...
-
--- Loads and stores.
- | LD Size Reg AddrMode -- size, dst, src
- | LDA Reg AddrMode -- dst, src
- | LDAH Reg AddrMode -- dst, src
- | LDGP Reg AddrMode -- dst, src
- | LDI Size Reg Imm -- size, dst, src
- | ST Size Reg AddrMode -- size, src, dst
-
--- Int Arithmetic.
- | CLR Reg -- dst
- | ABS Size RI Reg -- size, src, dst
- | NEG Size Bool RI Reg -- size, overflow, src, dst
- | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst
- | SADD Size Size Reg RI Reg -- size, scale, src, src, dst
- | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst
- | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst
- | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst
- | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst
- | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst
-
--- Simple bit-twiddling.
- | NOT RI Reg
- | AND Reg RI Reg
- | ANDNOT Reg RI Reg
- | OR Reg RI Reg
- | ORNOT Reg RI Reg
- | XOR Reg RI Reg
- | XORNOT Reg RI Reg
- | SLL Reg RI Reg
- | SRL Reg RI Reg
- | SRA Reg RI Reg
-
- | ZAP Reg RI Reg
- | ZAPNOT Reg RI Reg
-
- | NOP
-
--- Comparison
- | CMP Cond Reg RI Reg
-
--- Float Arithmetic.
- | FCLR Reg
- | FABS Reg Reg
- | FNEG Size Reg Reg
- | FADD Size Reg Reg Reg
- | FDIV Size Reg Reg Reg
- | FMUL Size Reg Reg Reg
- | FSUB Size Reg Reg Reg
- | CVTxy Size Size Reg Reg
- | FCMP Size Cond Reg Reg Reg
- | FMOV Reg Reg
-
--- Jumping around.
- | BI Cond Reg Imm
- | BF Cond Reg Imm
- | BR Imm
- | JMP Reg AddrMode Int
- | BSR Imm Int
- | JSR Reg AddrMode Int
-
--- Alpha-specific pseudo-ops.
- | FUNBEGIN CLabel
- | FUNEND CLabel
-
-data RI
- = RIReg Reg
- | RIImm Imm
-
-#endif /* alpha_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Intel x86 instructions
-
-{-
-Intel, in their infinite wisdom, selected a stack model for floating
-point registers on x86. That might have made sense back in 1979 --
-nowadays we can see it for the nonsense it really is. A stack model
-fits poorly with the existing nativeGen infrastructure, which assumes
-flat integer and FP register sets. Prior to this commit, nativeGen
-could not generate correct x86 FP code -- to do so would have meant
-somehow working the register-stack paradigm into the register
-allocator and spiller, which sounds very difficult.
-
-We have decided to cheat, and go for a simple fix which requires no
-infrastructure modifications, at the expense of generating ropey but
-correct FP code. All notions of the x86 FP stack and its insns have
-been removed. Instead, we pretend (to the instruction selector and
-register allocator) that x86 has six floating point registers, %fake0
-.. %fake5, which can be used in the usual flat manner. We further
-claim that x86 has floating point instructions very similar to SPARC
-and Alpha, that is, a simple 3-operand register-register arrangement.
-Code generation and register allocation proceed on this basis.
-
-When we come to print out the final assembly, our convenient fiction
-is converted to dismal reality. Each fake instruction is
-independently converted to a series of real x86 instructions.
-%fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
-arithmetic operations, the two operands are pushed onto the top of the
-FP stack, the operation done, and the result copied back into the
-relevant register. There are only six %fake registers because 2 are
-needed for the translation, and x86 has 8 in total.
-
-The translation is inefficient but is simple and it works. A cleverer
-translation would handle a sequence of insns, simulating the FP stack
-contents, would not impose a fixed mapping from %fake to %st regs, and
-hopefully could avoid most of the redundant reg-reg moves of the
-current translation.
-
-We might as well make use of whatever unique FP facilities Intel have
-chosen to bless us with (let's not be churlish, after all).
-Hence GLDZ and GLD1. Bwahahahahahahaha!
--}
-
-{-
-MORE FLOATING POINT MUSINGS...
-
-Intel's internal floating point registers are by default 80 bit
-extended precision. This means that all operations done on values in
-registers are done at 80 bits, and unless the intermediate values are
-truncated to the appropriate size (32 or 64 bits) by storing in
-memory, calculations in registers will give different results from
-calculations which pass intermediate values in memory (eg. via
-function calls).
-
-One solution is to set the FPU into 64 bit precision mode. Some OSs
-do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
-that this will only affect 64-bit precision arithmetic; 32-bit
-calculations will still be done at 64-bit precision in registers. So
-it doesn't solve the whole problem.
-
-There's also the issue of what the C library is expecting in terms of
-precision. It seems to be the case that glibc on Linux expects the
-FPU to be set to 80 bit precision, so setting it to 64 bit could have
-unexpected effects. Changing the default could have undesirable
-effects on other 3rd-party library code too, so the right thing would
-be to save/restore the FPU control word across Haskell code if we were
-to do this.
-
-gcc's -ffloat-store gives consistent results by always storing the
-results of floating-point calculations in memory, which works for both
-32 and 64-bit precision. However, it only affects the values of
-user-declared floating point variables in C, not intermediate results.
-GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
-flag).
-
-Another problem is how to spill floating point registers in the
-register allocator. Should we spill the whole 80 bits, or just 64?
-On an OS which is set to 64 bit precision, spilling 64 is fine. On
-Linux, spilling 64 bits will round the results of some operations.
-This is what gcc does. Spilling at 80 bits requires taking up a full
-128 bit slot (so we get alignment). We spill at 80-bits and ignore
-the alignment problems.
-
-In the future, we'll use the SSE registers for floating point. This
-requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
-precision float ops), which means P4 or Xeon and above. Using SSE
-will solve all these problems, because the SSE registers use fixed 32
-bit or 64 bit precision.
-
---SDM 1/2003
--}
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- data Instr continues...
-
--- Moves.
- | MOV Size Operand Operand
- | MOVZxL Size Operand Operand -- size is the size of operand 1
- | MOVSxL Size Operand Operand -- size is the size of operand 1
- -- x86_64 note: plain mov into a 32-bit register always zero-extends
- -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
- -- don't affect the high bits of the register.
-
--- Load effective address (also a very useful three-operand add instruction :-)
- | LEA Size Operand Operand
-
--- Int Arithmetic.
- | ADD Size Operand Operand
- | ADC Size Operand Operand
- | SUB Size Operand Operand
-
- | MUL Size Operand Operand
- | IMUL Size Operand Operand -- signed int mul
- | IMUL2 Size Operand -- %edx:%eax = operand * %eax
-
- | DIV Size Operand -- eax := eax:edx/op, edx := eax:edx%op
- | IDIV Size Operand -- ditto, but signed
-
--- Simple bit-twiddling.
- | AND Size Operand Operand
- | OR Size Operand Operand
- | XOR Size Operand Operand
- | NOT Size Operand
- | NEGI Size Operand -- NEG instruction (name clash with Cond)
-
--- Shifts (amount may be immediate or %cl only)
- | SHL Size Operand{-amount-} Operand
- | SAR Size Operand{-amount-} Operand
- | SHR Size Operand{-amount-} Operand
-
- | BT Size Imm Operand
- | NOP
-
-#if i386_TARGET_ARCH
--- Float Arithmetic.
-
--- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
--- as single instructions right up until we spit them out.
- -- all the 3-operand fake fp insns are src1 src2 dst
- -- and furthermore are constrained to be fp regs only.
- -- IMPORTANT: keep is_G_insn up to date with any changes here
- | GMOV Reg Reg -- src(fpreg), dst(fpreg)
- | GLD Size AddrMode Reg -- src, dst(fpreg)
- | GST Size Reg AddrMode -- src(fpreg), dst
-
- | GLDZ Reg -- dst(fpreg)
- | GLD1 Reg -- dst(fpreg)
-
- | GFTOI Reg Reg -- src(fpreg), dst(intreg)
- | GDTOI Reg Reg -- src(fpreg), dst(intreg)
-
- | GITOF Reg Reg -- src(intreg), dst(fpreg)
- | GITOD Reg Reg -- src(intreg), dst(fpreg)
-
- | GADD Size Reg Reg Reg -- src1, src2, dst
- | GDIV Size Reg Reg Reg -- src1, src2, dst
- | GSUB Size Reg Reg Reg -- src1, src2, dst
- | GMUL Size Reg Reg Reg -- src1, src2, dst
-
- -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
- -- Compare src1 with src2; set the Zero flag iff the numbers are
- -- comparable and the comparison is True. Subsequent code must
- -- test the %eflags zero flag regardless of the supplied Cond.
- | GCMP Cond Reg Reg -- src1, src2
-
- | GABS Size Reg Reg -- src, dst
- | GNEG Size Reg Reg -- src, dst
- | GSQRT Size Reg Reg -- src, dst
- | GSIN Size CLabel CLabel Reg Reg -- src, dst
- | GCOS Size CLabel CLabel Reg Reg -- src, dst
- | GTAN Size CLabel CLabel Reg Reg -- src, dst
-
- | GFREE -- do ffree on all x86 regs; an ugly hack
-#endif
-
-#if x86_64_TARGET_ARCH
--- SSE2 floating point: we use a restricted set of the available SSE2
--- instructions for floating-point.
-
- -- use MOV for moving (either movss or movsd (movlpd better?))
-
- | CVTSS2SD Reg Reg -- F32 to F64
- | CVTSD2SS Reg Reg -- F64 to F32
- | CVTTSS2SIQ Operand Reg -- F32 to I32/I64 (with truncation)
- | CVTTSD2SIQ Operand Reg -- F64 to I32/I64 (with truncation)
- | CVTSI2SS Operand Reg -- I32/I64 to F32
- | CVTSI2SD Operand Reg -- I32/I64 to F64
-
- -- use ADD & SUB for arithmetic. In both cases, operands
- -- are Operand Reg.
-
- -- SSE2 floating-point division:
- | FDIV Size Operand Operand -- divisor, dividend(dst)
-
- -- use CMP for comparisons. ucomiss and ucomisd instructions
- -- compare single/double prec floating point respectively.
-
- | SQRT Size Operand Reg -- src, dst
-#endif
-
--- Comparison
- | TEST Size Operand Operand
- | CMP Size Operand Operand
- | SETCC Cond Operand
-
--- Stack Operations.
- | PUSH Size Operand
- | POP Size Operand
- -- both unused (SDM):
- -- | PUSHA
- -- | POPA
-
--- Jumping around.
- | JMP Operand
- | JXX Cond BlockId -- includes unconditional branches
- | JXX_GBL Cond Imm -- non-local version of JXX
- | JMP_TBL Operand [BlockId] -- table jump
- | CALL (Either Imm Reg) [Reg]
-
--- Other things.
- | CLTD Size -- sign extend %eax into %edx:%eax
-
- | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
- -- pretty-prints as
- -- call 1f
- -- 1: popl %reg
- -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
- | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
- -- pretty-prints as
- -- call 1f
- -- 1: popl %reg
-
-
-data Operand
- = OpReg Reg -- register
- | OpImm Imm -- immediate value
- | OpAddr AddrMode -- memory reference
-
-#endif /* i386 or x86_64 */
-
-#if i386_TARGET_ARCH
-i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr]
-i386_insert_ffrees blocks
- | or (map (any is_G_instr) [ instrs | BasicBlock id instrs <- blocks ])
- = map ffree_before_nonlocal_transfers blocks
- | otherwise
- = blocks
- where
- ffree_before_nonlocal_transfers (BasicBlock id insns)
- = BasicBlock id (foldr p [] insns)
- where p insn r = case insn of
- CALL _ _ -> GFREE : insn : r
- JMP _ -> GFREE : insn : r
- other -> insn : r
-
--- if you ever add a new FP insn to the fake x86 FP insn set,
--- you must update this too
-is_G_instr :: Instr -> Bool
-is_G_instr instr
- = case instr of
- GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
- GLDZ _ -> True; GLD1 _ -> True
- GFTOI _ _ -> True; GDTOI _ _ -> True
- GITOF _ _ -> True; GITOD _ _ -> True
- GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
- GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
- GCMP _ _ _ -> True; GABS _ _ _ -> True
- GNEG _ _ _ -> True; GSQRT _ _ _ -> True
- GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True
- GFREE -> panic "is_G_instr: GFREE (!)"
- other -> False
-#endif /* i386_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Sparc instructions
-
-#if sparc_TARGET_ARCH
-
--- data Instr continues...
-
--- Loads and stores.
- | LD Size AddrMode Reg -- size, src, dst
- | ST Size Reg AddrMode -- size, src, dst
-
--- Int Arithmetic.
- | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
- | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
-
- | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst
- | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst
-
-
- -- The SPARC divide instructions perform 64bit by 32bit division
- -- The Y register is xored into the first operand.
-
- -- On _some implementations_ the Y register is overwritten by
- -- the remainder, so we have to make sure it is 0 each time.
-
- -- dst <- ((Y `shiftL` 32) `or` src1) `div` src2
- | UDIV Bool Reg RI Reg -- cc?, src1, src2, dst
- | SDIV Bool Reg RI Reg -- cc?, src1, src2, dst
-
- | RDY Reg -- move contents of Y register to reg
- | WRY Reg Reg -- Y <- src1 `xor` src2
-
--- Simple bit-twiddling.
- | AND Bool Reg RI Reg -- cc?, src1, src2, dst
- | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
- | OR Bool Reg RI Reg -- cc?, src1, src2, dst
- | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
- | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
- | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
- | SLL Reg RI Reg -- src1, src2, dst
- | SRL Reg RI Reg -- src1, src2, dst
- | SRA Reg RI Reg -- src1, src2, dst
- | SETHI Imm Reg -- src, dst
- | NOP -- Really SETHI 0, %g0, but worth an alias
-
--- Float Arithmetic.
-
--- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
--- instructions right up until we spit them out.
- | FABS Size Reg Reg -- src dst
- | FADD Size Reg Reg Reg -- src1, src2, dst
- | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
- | FDIV Size Reg Reg Reg -- src1, src2, dst
- | FMOV Size Reg Reg -- src, dst
- | FMUL Size Reg Reg Reg -- src1, src2, dst
- | FNEG Size Reg Reg -- src, dst
- | FSQRT Size Reg Reg -- src, dst
- | FSUB Size Reg Reg Reg -- src1, src2, dst
- | FxTOy Size Size Reg Reg -- src, dst
-
--- Jumping around.
- | BI Cond Bool BlockId -- cond, annul?, target
- | BF Cond Bool BlockId -- cond, annul?, target
-
- | JMP AddrMode -- target
-
- -- With a tabled jump we know all the possible destinations. Tabled
- -- jump includes its list of destinations so we can work out what regs
- -- are live across the jump.
- --
- | JMP_TBL AddrMode [BlockId]
-
- | CALL (Either Imm Reg) Int Bool -- target, args, terminal
-
-riZero :: RI -> Bool
-
-riZero (RIImm (ImmInt 0)) = True
-riZero (RIImm (ImmInteger 0)) = True
-riZero (RIReg (RealReg 0)) = True
-riZero _ = False
-
--- Calculate the effective address which would be used by the
--- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
--- alas -- can't have fpRelEA here because of module dependencies.
-fpRelEA :: Int -> Reg -> Instr
-fpRelEA n dst
- = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
-
--- Code to shift the stack pointer by n words.
-moveSp :: Int -> Instr
-moveSp n
- = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
-
--- Produce the second-half-of-a-double register given the first half.
-fPair :: Reg -> Maybe Reg
-fPair (RealReg n)
- | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1))
-
-fPair (VirtualRegD u)
- = Just (VirtualRegHi u)
-
-fPair other
- = trace ("MachInstrs.fPair: can't get high half of supposed double reg " ++ show other)
- Nothing
-
-#endif /* sparc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- PowerPC instructions
-#ifdef powerpc_TARGET_ARCH
--- data Instr continues...
--- Loads and stores.
- | LD Size Reg AddrMode -- Load size, dst, src
- | LA Size Reg AddrMode -- Load arithmetic size, dst, src
- | ST Size Reg AddrMode -- Store size, src, dst
- | STU Size Reg AddrMode -- Store with Update size, src, dst
- | LIS Reg Imm -- Load Immediate Shifted dst, src
- | LI Reg Imm -- Load Immediate dst, src
- | MR Reg Reg -- Move Register dst, src -- also for fmr
-
- | CMP Size Reg RI --- size, src1, src2
- | CMPL Size Reg RI --- size, src1, src2
-
- | BCC Cond BlockId
- | BCCFAR Cond BlockId
- | JMP CLabel -- same as branch,
- -- but with CLabel instead of block ID
- | MTCTR Reg
- | BCTR [BlockId] -- with list of local destinations
- | BL CLabel [Reg] -- with list of argument regs
- | BCTRL [Reg]
-
- | ADD Reg Reg RI -- dst, src1, src2
- | ADDC Reg Reg Reg -- (carrying) dst, src1, src2
- | ADDE Reg Reg Reg -- (extend) dst, src1, src2
- | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
- | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
- | MULLW Reg Reg RI
- | DIVW Reg Reg Reg
- | DIVWU Reg Reg Reg
- | MULLW_MayOflo Reg Reg Reg
- -- dst = 1 if src1 * src2 overflows
- -- pseudo-instruction; pretty-printed as:
- -- mullwo. dst, src1, src2
- -- mfxer dst
- -- rlwinm dst, dst, 2, 31,31
-
- | AND Reg Reg RI -- dst, src1, src2
- | OR Reg Reg RI -- dst, src1, src2
- | XOR Reg Reg RI -- dst, src1, src2
- | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
-
- | EXTS Size Reg Reg
-
- | NEG Reg Reg
- | NOT Reg Reg
-
- | SLW Reg Reg RI -- shift left word
- | SRW Reg Reg RI -- shift right word
- | SRAW Reg Reg RI -- shift right arithmetic word
-
- -- Rotate Left Word Immediate then AND with Mask
- | RLWINM Reg Reg Int Int Int
-
- | FADD Size Reg Reg Reg
- | FSUB Size Reg Reg Reg
- | FMUL Size Reg Reg Reg
- | FDIV Size Reg Reg Reg
- | FNEG Reg Reg -- negate is the same for single and double prec.
-
- | FCMP Reg Reg
-
- | FCTIWZ Reg Reg -- convert to integer word
- | FRSP Reg Reg -- reduce to single precision
- -- (but destination is a FP register)
-
- | CRNOR Int Int Int -- condition register nor
- | MFCR Reg -- move from condition register
-
- | MFLR Reg -- move from link register
- | FETCHPC Reg -- pseudo-instruction:
- -- bcl to next insn, mflr reg
-
- | LWSYNC -- memory barrier
-#endif /* powerpc_TARGET_ARCH */
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
new file mode 100644
index 0000000000..beb9e15405
--- /dev/null
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -0,0 +1,166 @@
+-----------------------------------------------------------------------------
+--
+-- Machine-dependent assembly language
+--
+-- (c) The University of Glasgow 1993-2004
+--
+-----------------------------------------------------------------------------
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+
+module PPC.Instr (
+ Cond(..),
+ condNegate,
+ RI(..),
+ Instr(..)
+)
+
+where
+
+import BlockId
+import MachRegs
+import Cmm
+import Outputable
+import FastString
+import CLabel
+
+data Cond
+ = ALWAYS
+ | EQQ
+ | GE
+ | GEU
+ | GTT
+ | GU
+ | LE
+ | LEU
+ | LTT
+ | LU
+ | NE
+ deriving Eq
+
+
+condNegate :: Cond -> Cond
+condNegate ALWAYS = panic "condNegate: ALWAYS"
+condNegate EQQ = NE
+condNegate GE = LTT
+condNegate GEU = LU
+condNegate GTT = LE
+condNegate GU = LEU
+condNegate LE = GTT
+condNegate LEU = GU
+condNegate LTT = GE
+condNegate LU = GEU
+condNegate NE = EQQ
+
+
+-- -----------------------------------------------------------------------------
+-- Machine's assembly language
+
+-- We have a few common "instructions" (nearly all the pseudo-ops) but
+-- mostly all of 'Instr' is machine-specific.
+
+-- Register or immediate
+data RI
+ = RIReg Reg
+ | RIImm Imm
+
+data Instr
+ -- comment pseudo-op
+ = COMMENT FastString
+
+ -- some static data spat out during code
+ -- generation. Will be extracted before
+ -- pretty-printing.
+ | LDATA Section [CmmStatic]
+
+ -- start a new basic block. Useful during
+ -- codegen, removed later. Preceding
+ -- instruction should be a jump, as per the
+ -- invariants for a BasicBlock (see Cmm).
+ | NEWBLOCK BlockId
+
+ -- specify current stack offset for
+ -- benefit of subsequent passes
+ | DELTA Int
+
+ -- | spill this reg to a stack slot
+ | SPILL Reg Int
+
+ -- | reload this reg from a stack slot
+ | RELOAD Int Reg
+
+ -- Loads and stores.
+ | LD Size Reg AddrMode -- Load size, dst, src
+ | LA Size Reg AddrMode -- Load arithmetic size, dst, src
+ | ST Size Reg AddrMode -- Store size, src, dst
+ | STU Size Reg AddrMode -- Store with Update size, src, dst
+ | LIS Reg Imm -- Load Immediate Shifted dst, src
+ | LI Reg Imm -- Load Immediate dst, src
+ | MR Reg Reg -- Move Register dst, src -- also for fmr
+
+ | CMP Size Reg RI --- size, src1, src2
+ | CMPL Size Reg RI --- size, src1, src2
+
+ | BCC Cond BlockId
+ | BCCFAR Cond BlockId
+ | JMP CLabel -- same as branch,
+ -- but with CLabel instead of block ID
+ | MTCTR Reg
+ | BCTR [BlockId] -- with list of local destinations
+ | BL CLabel [Reg] -- with list of argument regs
+ | BCTRL [Reg]
+
+ | ADD Reg Reg RI -- dst, src1, src2
+ | ADDC Reg Reg Reg -- (carrying) dst, src1, src2
+ | ADDE Reg Reg Reg -- (extend) dst, src1, src2
+ | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
+ | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
+ | MULLW Reg Reg RI
+ | DIVW Reg Reg Reg
+ | DIVWU Reg Reg Reg
+
+ | MULLW_MayOflo Reg Reg Reg
+ -- dst = 1 if src1 * src2 overflows
+ -- pseudo-instruction; pretty-printed as:
+ -- mullwo. dst, src1, src2
+ -- mfxer dst
+ -- rlwinm dst, dst, 2, 31,31
+
+ | AND Reg Reg RI -- dst, src1, src2
+ | OR Reg Reg RI -- dst, src1, src2
+ | XOR Reg Reg RI -- dst, src1, src2
+ | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
+
+ | EXTS Size Reg Reg
+
+ | NEG Reg Reg
+ | NOT Reg Reg
+
+ | SLW Reg Reg RI -- shift left word
+ | SRW Reg Reg RI -- shift right word
+ | SRAW Reg Reg RI -- shift right arithmetic word
+
+ -- Rotate Left Word Immediate then AND with Mask
+ | RLWINM Reg Reg Int Int Int
+
+ | FADD Size Reg Reg Reg
+ | FSUB Size Reg Reg Reg
+ | FMUL Size Reg Reg Reg
+ | FDIV Size Reg Reg Reg
+ | FNEG Reg Reg -- negate is the same for single and double prec.
+
+ | FCMP Reg Reg
+
+ | FCTIWZ Reg Reg -- convert to integer word
+ | FRSP Reg Reg -- reduce to single precision
+ -- (but destination is a FP register)
+
+ | CRNOR Int Int Int -- condition register nor
+ | MFCR Reg -- move from condition register
+
+ | MFLR Reg -- move from link register
+ | FETCHPC Reg -- pseudo-instruction:
+ -- bcl to next insn, mflr reg
+
+ | LWSYNC -- memory barrier
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
new file mode 100644
index 0000000000..7f782c9a72
--- /dev/null
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -0,0 +1,188 @@
+-----------------------------------------------------------------------------
+--
+-- Machine-dependent assembly language
+--
+-- (c) The University of Glasgow 1993-2004
+--
+-----------------------------------------------------------------------------
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+
+module SPARC.Instr (
+ Cond(..),
+ RI(..),
+ Instr(..),
+ riZero,
+ fpRelEA,
+ moveSp,
+ fPair,
+)
+
+where
+
+import BlockId
+import MachRegs
+import Cmm
+import Outputable
+import Constants ( wORD_SIZE )
+import FastString
+
+import GHC.Exts
+
+
+data Cond
+ = ALWAYS
+ | EQQ
+ | GE
+ | GEU
+ | GTT
+ | GU
+ | LE
+ | LEU
+ | LTT
+ | LU
+ | NE
+ | NEG
+ | NEVER
+ | POS
+ | VC
+ | VS
+ deriving Eq
+
+-- -----------------------------------------------------------------------------
+-- Machine's assembly language
+
+-- We have a few common "instructions" (nearly all the pseudo-ops) but
+-- mostly all of 'Instr' is machine-specific.
+
+-- Register or immediate
+data RI
+ = RIReg Reg
+ | RIImm Imm
+
+data Instr
+ -- comment pseudo-op
+ = COMMENT FastString
+
+ -- some static data spat out during code
+ -- generation. Will be extracted before
+ -- pretty-printing.
+ | LDATA Section [CmmStatic]
+
+ -- start a new basic block. Useful during
+ -- codegen, removed later. Preceding
+ -- instruction should be a jump, as per the
+ -- invariants for a BasicBlock (see Cmm).
+ | NEWBLOCK BlockId
+
+ -- specify current stack offset for
+ -- benefit of subsequent passes
+ | DELTA Int
+
+ -- | spill this reg to a stack slot
+ | SPILL Reg Int
+
+ -- | reload this reg from a stack slot
+ | RELOAD Int Reg
+
+
+ -- Loads and stores.
+ | LD Size AddrMode Reg -- size, src, dst
+ | ST Size Reg AddrMode -- size, src, dst
+
+ -- Int Arithmetic.
+ | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
+ | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
+
+ | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst
+ | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst
+
+
+ -- The SPARC divide instructions perform 64bit by 32bit division
+ -- The Y register is xored into the first operand.
+
+ -- On _some implementations_ the Y register is overwritten by
+ -- the remainder, so we have to make sure it is 0 each time.
+
+ -- dst <- ((Y `shiftL` 32) `or` src1) `div` src2
+ | UDIV Bool Reg RI Reg -- cc?, src1, src2, dst
+ | SDIV Bool Reg RI Reg -- cc?, src1, src2, dst
+
+ | RDY Reg -- move contents of Y register to reg
+ | WRY Reg Reg -- Y <- src1 `xor` src2
+
+ -- Simple bit-twiddling.
+ | AND Bool Reg RI Reg -- cc?, src1, src2, dst
+ | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
+ | OR Bool Reg RI Reg -- cc?, src1, src2, dst
+ | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
+ | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
+ | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
+ | SLL Reg RI Reg -- src1, src2, dst
+ | SRL Reg RI Reg -- src1, src2, dst
+ | SRA Reg RI Reg -- src1, src2, dst
+ | SETHI Imm Reg -- src, dst
+ | NOP -- Really SETHI 0, %g0, but worth an alias
+
+ -- Float Arithmetic.
+ -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
+ -- instructions right up until we spit them out.
+ | FABS Size Reg Reg -- src dst
+ | FADD Size Reg Reg Reg -- src1, src2, dst
+ | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst
+ | FDIV Size Reg Reg Reg -- src1, src2, dst
+ | FMOV Size Reg Reg -- src, dst
+ | FMUL Size Reg Reg Reg -- src1, src2, dst
+ | FNEG Size Reg Reg -- src, dst
+ | FSQRT Size Reg Reg -- src, dst
+ | FSUB Size Reg Reg Reg -- src1, src2, dst
+ | FxTOy Size Size Reg Reg -- src, dst
+
+ -- Jumping around.
+ | BI Cond Bool BlockId -- cond, annul?, target
+ | BF Cond Bool BlockId -- cond, annul?, target
+
+ | JMP AddrMode -- target
+
+ -- With a tabled jump we know all the possible destinations. Tabled
+ -- jump includes its list of destinations so we can work out what regs
+ -- are live across the jump.
+ --
+ | JMP_TBL AddrMode [BlockId]
+
+ | CALL (Either Imm Reg) Int Bool -- target, args, terminal
+
+
+riZero :: RI -> Bool
+riZero (RIImm (ImmInt 0)) = True
+riZero (RIImm (ImmInteger 0)) = True
+riZero (RIReg (RealReg 0)) = True
+riZero _ = False
+
+
+-- | Calculate the effective address which would be used by the
+-- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
+-- alas -- can't have fpRelEA here because of module dependencies.
+fpRelEA :: Int -> Reg -> Instr
+fpRelEA n dst
+ = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+
+
+-- | Code to shift the stack pointer by n words.
+moveSp :: Int -> Instr
+moveSp n
+ = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+
+
+-- | Produce the second-half-of-a-double register given the first half.
+fPair :: Reg -> Maybe Reg
+fPair (RealReg n)
+ | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1))
+
+fPair (VirtualRegD u)
+ = Just (VirtualRegHi u)
+
+fPair other
+ = trace ("MachInstrs.fPair: can't get high half of supposed double reg " ++ show other)
+ Nothing
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
new file mode 100644
index 0000000000..0944e9265a
--- /dev/null
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -0,0 +1,335 @@
+-----------------------------------------------------------------------------
+--
+-- Machine-dependent assembly language
+--
+-- (c) The University of Glasgow 1993-2004
+--
+-----------------------------------------------------------------------------
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+
+module X86.Instr
+where
+
+import BlockId
+import MachRegs
+import Cmm
+import FastString
+
+data Cond
+ = ALWAYS -- What's really used? ToDo
+ | EQQ
+ | GE
+ | GEU
+ | GTT
+ | GU
+ | LE
+ | LEU
+ | LTT
+ | LU
+ | NE
+ | NEG
+ | POS
+ | CARRY
+ | OFLO
+ | PARITY
+ | NOTPARITY
+
+
+
+-- -----------------------------------------------------------------------------
+-- Intel x86 instructions
+
+{-
+Intel, in their infinite wisdom, selected a stack model for floating
+point registers on x86. That might have made sense back in 1979 --
+nowadays we can see it for the nonsense it really is. A stack model
+fits poorly with the existing nativeGen infrastructure, which assumes
+flat integer and FP register sets. Prior to this commit, nativeGen
+could not generate correct x86 FP code -- to do so would have meant
+somehow working the register-stack paradigm into the register
+allocator and spiller, which sounds very difficult.
+
+We have decided to cheat, and go for a simple fix which requires no
+infrastructure modifications, at the expense of generating ropey but
+correct FP code. All notions of the x86 FP stack and its insns have
+been removed. Instead, we pretend (to the instruction selector and
+register allocator) that x86 has six floating point registers, %fake0
+.. %fake5, which can be used in the usual flat manner. We further
+claim that x86 has floating point instructions very similar to SPARC
+and Alpha, that is, a simple 3-operand register-register arrangement.
+Code generation and register allocation proceed on this basis.
+
+When we come to print out the final assembly, our convenient fiction
+is converted to dismal reality. Each fake instruction is
+independently converted to a series of real x86 instructions.
+%fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
+arithmetic operations, the two operands are pushed onto the top of the
+FP stack, the operation done, and the result copied back into the
+relevant register. There are only six %fake registers because 2 are
+needed for the translation, and x86 has 8 in total.
+
+The translation is inefficient but is simple and it works. A cleverer
+translation would handle a sequence of insns, simulating the FP stack
+contents, would not impose a fixed mapping from %fake to %st regs, and
+hopefully could avoid most of the redundant reg-reg moves of the
+current translation.
+
+We might as well make use of whatever unique FP facilities Intel have
+chosen to bless us with (let's not be churlish, after all).
+Hence GLDZ and GLD1. Bwahahahahahahaha!
+-}
+
+{-
+MORE FLOATING POINT MUSINGS...
+
+Intel's internal floating point registers are by default 80 bit
+extended precision. This means that all operations done on values in
+registers are done at 80 bits, and unless the intermediate values are
+truncated to the appropriate size (32 or 64 bits) by storing in
+memory, calculations in registers will give different results from
+calculations which pass intermediate values in memory (eg. via
+function calls).
+
+One solution is to set the FPU into 64 bit precision mode. Some OSs
+do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
+that this will only affect 64-bit precision arithmetic; 32-bit
+calculations will still be done at 64-bit precision in registers. So
+it doesn't solve the whole problem.
+
+There's also the issue of what the C library is expecting in terms of
+precision. It seems to be the case that glibc on Linux expects the
+FPU to be set to 80 bit precision, so setting it to 64 bit could have
+unexpected effects. Changing the default could have undesirable
+effects on other 3rd-party library code too, so the right thing would
+be to save/restore the FPU control word across Haskell code if we were
+to do this.
+
+gcc's -ffloat-store gives consistent results by always storing the
+results of floating-point calculations in memory, which works for both
+32 and 64-bit precision. However, it only affects the values of
+user-declared floating point variables in C, not intermediate results.
+GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
+flag).
+
+Another problem is how to spill floating point registers in the
+register allocator. Should we spill the whole 80 bits, or just 64?
+On an OS which is set to 64 bit precision, spilling 64 is fine. On
+Linux, spilling 64 bits will round the results of some operations.
+This is what gcc does. Spilling at 80 bits requires taking up a full
+128 bit slot (so we get alignment). We spill at 80-bits and ignore
+the alignment problems.
+
+In the future, we'll use the SSE registers for floating point. This
+requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
+precision float ops), which means P4 or Xeon and above. Using SSE
+will solve all these problems, because the SSE registers use fixed 32
+bit or 64 bit precision.
+
+--SDM 1/2003
+-}
+
+
+data Instr
+ -- comment pseudo-op
+ = COMMENT FastString
+
+ -- some static data spat out during code
+ -- generation. Will be extracted before
+ -- pretty-printing.
+ | LDATA Section [CmmStatic]
+
+ -- start a new basic block. Useful during
+ -- codegen, removed later. Preceding
+ -- instruction should be a jump, as per the
+ -- invariants for a BasicBlock (see Cmm).
+ | NEWBLOCK BlockId
+
+ -- specify current stack offset for
+ -- benefit of subsequent passes
+ | DELTA Int
+
+ -- | spill this reg to a stack slot
+ | SPILL Reg Int
+
+ -- | reload this reg from a stack slot
+ | RELOAD Int Reg
+
+
+ -- Moves.
+ | MOV Size Operand Operand
+ | MOVZxL Size Operand Operand -- size is the size of operand 1
+ | MOVSxL Size Operand Operand -- size is the size of operand 1
+ -- x86_64 note: plain mov into a 32-bit register always zero-extends
+ -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
+ -- don't affect the high bits of the register.
+
+ -- Load effective address (also a very useful three-operand add instruction :-)
+ | LEA Size Operand Operand
+
+ -- Int Arithmetic.
+ | ADD Size Operand Operand
+ | ADC Size Operand Operand
+ | SUB Size Operand Operand
+
+ | MUL Size Operand Operand
+ | IMUL Size Operand Operand -- signed int mul
+ | IMUL2 Size Operand -- %edx:%eax = operand * %eax
+
+ | DIV Size Operand -- eax := eax:edx/op, edx := eax:edx%op
+ | IDIV Size Operand -- ditto, but signed
+
+ -- Simple bit-twiddling.
+ | AND Size Operand Operand
+ | OR Size Operand Operand
+ | XOR Size Operand Operand
+ | NOT Size Operand
+ | NEGI Size Operand -- NEG instruction (name clash with Cond)
+
+ -- Shifts (amount may be immediate or %cl only)
+ | SHL Size Operand{-amount-} Operand
+ | SAR Size Operand{-amount-} Operand
+ | SHR Size Operand{-amount-} Operand
+
+ | BT Size Imm Operand
+ | NOP
+
+#if i386_TARGET_ARCH
+ -- Float Arithmetic.
+
+ -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
+ -- as single instructions right up until we spit them out.
+ -- all the 3-operand fake fp insns are src1 src2 dst
+ -- and furthermore are constrained to be fp regs only.
+ -- IMPORTANT: keep is_G_insn up to date with any changes here
+ | GMOV Reg Reg -- src(fpreg), dst(fpreg)
+ | GLD Size AddrMode Reg -- src, dst(fpreg)
+ | GST Size Reg AddrMode -- src(fpreg), dst
+
+ | GLDZ Reg -- dst(fpreg)
+ | GLD1 Reg -- dst(fpreg)
+
+ | GFTOI Reg Reg -- src(fpreg), dst(intreg)
+ | GDTOI Reg Reg -- src(fpreg), dst(intreg)
+
+ | GITOF Reg Reg -- src(intreg), dst(fpreg)
+ | GITOD Reg Reg -- src(intreg), dst(fpreg)
+
+ | GADD Size Reg Reg Reg -- src1, src2, dst
+ | GDIV Size Reg Reg Reg -- src1, src2, dst
+ | GSUB Size Reg Reg Reg -- src1, src2, dst
+ | GMUL Size Reg Reg Reg -- src1, src2, dst
+
+ -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
+ -- Compare src1 with src2; set the Zero flag iff the numbers are
+ -- comparable and the comparison is True. Subsequent code must
+ -- test the %eflags zero flag regardless of the supplied Cond.
+ | GCMP Cond Reg Reg -- src1, src2
+
+ | GABS Size Reg Reg -- src, dst
+ | GNEG Size Reg Reg -- src, dst
+ | GSQRT Size Reg Reg -- src, dst
+ | GSIN Size CLabel CLabel Reg Reg -- src, dst
+ | GCOS Size CLabel CLabel Reg Reg -- src, dst
+ | GTAN Size CLabel CLabel Reg Reg -- src, dst
+
+ | GFREE -- do ffree on all x86 regs; an ugly hack
+#endif
+
+#if x86_64_TARGET_ARCH
+-- SSE2 floating point: we use a restricted set of the available SSE2
+-- instructions for floating-point.
+
+ -- use MOV for moving (either movss or movsd (movlpd better?))
+
+ | CVTSS2SD Reg Reg -- F32 to F64
+ | CVTSD2SS Reg Reg -- F64 to F32
+ | CVTTSS2SIQ Operand Reg -- F32 to I32/I64 (with truncation)
+ | CVTTSD2SIQ Operand Reg -- F64 to I32/I64 (with truncation)
+ | CVTSI2SS Operand Reg -- I32/I64 to F32
+ | CVTSI2SD Operand Reg -- I32/I64 to F64
+
+ -- use ADD & SUB for arithmetic. In both cases, operands
+ -- are Operand Reg.
+
+ -- SSE2 floating-point division:
+ | FDIV Size Operand Operand -- divisor, dividend(dst)
+
+ -- use CMP for comparisons. ucomiss and ucomisd instructions
+ -- compare single/double prec floating point respectively.
+
+ | SQRT Size Operand Reg -- src, dst
+#endif
+
+ -- Comparison
+ | TEST Size Operand Operand
+ | CMP Size Operand Operand
+ | SETCC Cond Operand
+
+ -- Stack Operations.
+ | PUSH Size Operand
+ | POP Size Operand
+ -- both unused (SDM):
+ -- | PUSHA
+ -- | POPA
+
+ -- Jumping around.
+ | JMP Operand
+ | JXX Cond BlockId -- includes unconditional branches
+ | JXX_GBL Cond Imm -- non-local version of JXX
+ | JMP_TBL Operand [BlockId] -- table jump
+ | CALL (Either Imm Reg) [Reg]
+
+ -- Other things.
+ | CLTD Size -- sign extend %eax into %edx:%eax
+
+ | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
+ -- pretty-prints as
+ -- call 1f
+ -- 1: popl %reg
+ -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
+ | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
+ -- pretty-prints as
+ -- call 1f
+ -- 1: popl %reg
+
+
+data Operand
+ = OpReg Reg -- register
+ | OpImm Imm -- immediate value
+ | OpAddr AddrMode -- memory reference
+
+
+#if i386_TARGET_ARCH
+i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr]
+i386_insert_ffrees blocks
+ | or (map (any is_G_instr) [ instrs | BasicBlock id instrs <- blocks ])
+ = map ffree_before_nonlocal_transfers blocks
+ | otherwise
+ = blocks
+ where
+ ffree_before_nonlocal_transfers (BasicBlock id insns)
+ = BasicBlock id (foldr p [] insns)
+ where p insn r = case insn of
+ CALL _ _ -> GFREE : insn : r
+ JMP _ -> GFREE : insn : r
+ other -> insn : r
+
+-- if you ever add a new FP insn to the fake x86 FP insn set,
+-- you must update this too
+is_G_instr :: Instr -> Bool
+is_G_instr instr
+ = case instr of
+ GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
+ GLDZ _ -> True; GLD1 _ -> True
+ GFTOI _ _ -> True; GDTOI _ _ -> True
+ GITOF _ _ -> True; GITOD _ _ -> True
+ GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
+ GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
+ GCMP _ _ _ -> True; GABS _ _ _ -> True
+ GNEG _ _ _ -> True; GSQRT _ _ _ -> True
+ GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True
+ GFREE -> panic "is_G_instr: GFREE (!)"
+ other -> False
+#endif /* i386_TARGET_ARCH */