summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2017-08-27 00:16:59 -0400
committerBen Gamari <ben@smart-cactus.org>2019-06-26 23:29:47 -0400
commit167a44935c50faac68208ff8bbfec7a4e8c0ae6c (patch)
tree7b4c932683ea225beaf90521d28c442977186874
parentf509f72b909b6c2c2f50518664c474d880fd36ff (diff)
downloadhaskell-wip/ncg-simd.tar.gz
-rw-r--r--compiler/cmm/CmmType.hs1
-rw-r--r--compiler/nativeGen/Reg.hs28
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs31
-rw-r--r--compiler/nativeGen/X86/Instr.hs20
-rw-r--r--compiler/nativeGen/X86/Ppr.hs9
-rw-r--r--testsuite/tests/simd/Simd1.hs7
-rw-r--r--testsuite/tests/simd/all.T1
7 files changed, 80 insertions, 17 deletions
diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs
index 43d23c7ee7..1ab96026ca 100644
--- a/compiler/cmm/CmmType.hs
+++ b/compiler/cmm/CmmType.hs
@@ -273,6 +273,7 @@ narrowS _ _ = panic "narrowTo"
-- SIMD
-----------------------------------------------------------------------------
+-- | SIMD vector length
type Length = Int
vec :: Length -> CmmType -> CmmType
diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs
index 7f69ea01a4..e0c0176a75 100644
--- a/compiler/nativeGen/Reg.hs
+++ b/compiler/nativeGen/Reg.hs
@@ -52,11 +52,11 @@ type RegNo
-- Virtual regs can be of either class, so that info is attached.
--
data VirtualReg
- = VirtualRegI {-# UNPACK #-} !Unique
- | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
- | VirtualRegF {-# UNPACK #-} !Unique
- | VirtualRegD {-# UNPACK #-} !Unique
-
+ = VirtualRegI {-# UNPACK #-} !Unique
+ | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
+ | VirtualRegF {-# UNPACK #-} !Unique
+ | VirtualRegD {-# UNPACK #-} !Unique
+ | VirtualRegSSE {-# UNPACK #-} !Unique
deriving (Eq, Show)
-- This is laborious, but necessary. We can't derive Ord because
@@ -69,14 +69,15 @@ instance Ord VirtualReg where
compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b
compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b
compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b
-
+ compare (VirtualRegSSE a) (VirtualRegSSE b) = nonDetCmpUnique a b
compare VirtualRegI{} _ = LT
compare _ VirtualRegI{} = GT
compare VirtualRegHi{} _ = LT
compare _ VirtualRegHi{} = GT
compare VirtualRegF{} _ = LT
compare _ VirtualRegF{} = GT
-
+ compare VirtualRegD{} _ = LT
+ compare _ VirtualRegD{} = GT
instance Uniquable VirtualReg where
@@ -86,18 +87,16 @@ instance Uniquable VirtualReg where
VirtualRegHi u -> u
VirtualRegF u -> u
VirtualRegD u -> u
+ VirtualRegSSE u -> u
instance Outputable VirtualReg where
ppr reg
= case reg of
VirtualRegI u -> text "%vI_" <> pprUniqueAlways u
VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
- -- this code is kinda wrong on x86
- -- because float and double occupy the same register set
- -- namely SSE2 register xmm0 .. xmm15
- VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u
- VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u
-
+ VirtualRegF u -> text "%vF_" <> pprUniqueAlways u
+ VirtualRegD u -> text "%vD_" <> pprUniqueAlways u
+ VirtualRegSSE u -> text "%vSSE_" <> pprUniqueAlways u
renameVirtualReg :: Unique -> VirtualReg -> VirtualReg
@@ -107,6 +106,7 @@ renameVirtualReg u r
VirtualRegHi _ -> VirtualRegHi u
VirtualRegF _ -> VirtualRegF u
VirtualRegD _ -> VirtualRegD u
+ VirtualRegSSE _ -> VirtualRegSSE u
classOfVirtualReg :: VirtualReg -> RegClass
@@ -116,7 +116,7 @@ classOfVirtualReg vr
VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloat
VirtualRegD{} -> RcDouble
-
+ VirtualRegSSE{} -> RcDoubleSSE
-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 73cfb28d46..fe6d4d0499 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -284,11 +284,13 @@ data ChildCode64
data Register
= Fixed Format Reg InstrBlock
| Any Format (Reg -> InstrBlock)
+ | AnyV (Reg -> InstrBlock)
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn) format = Any format codefn
+swizzleRegisterRep r@(AnyV{}) _ = r
-- | Grab the Reg for a CmmReg
@@ -368,6 +370,9 @@ getSomeReg expr = do
Any rep code -> do
tmp <- getNewRegNat rep
return (tmp, code tmp)
+ AnyV code -> do
+ tmp <- getVectorReg
+ return (tmp, code tmp)
Fixed _ reg code ->
return (reg, code)
@@ -484,6 +489,16 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
r_dst_lo
)
+iselExpr64 (CmmMachOp (MO_V_Add len width) [expr1, expr2]) = do
+ r_dst <- getVectorReg
+ ChildCode64 code1 r1 <- iselExpr64 expr1
+ ChildCode64 code2 r2 <- iselExpr64 expr2
+ let fmt = VecFormat len width FmtInt
+ return $ ChildCode64 (code1 `appOL` code2 `appOL`
+ toOL [ V_MOV fmt (OpReg r_dst) (OpReg r1)
+ , V_ADD (VecFormat len width FmtInt) (OpReg r2) (OpReg r_dst)
+ ]) r_dst
+
iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
fn <- getAnyReg expr
r_dst_lo <- getNewRegNat II32
@@ -501,6 +516,8 @@ iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
iselExpr64 expr
= pprPanic "iselExpr64(i386)" (ppr expr)
+getVectorReg :: NatM Reg
+getVectorReg = RegVirtual . VirtualRegSSE <$> getUniqueM
--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
@@ -822,7 +839,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
- MO_V_Add {} -> needLlvm
+ MO_V_Add {} -> triv_op W512 V_ADD
MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm
@@ -985,6 +1002,12 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
getRegister' _ _ (CmmLoad mem pk)
+ | isVecType pk
+ = do
+ code <- vecLoadCode (V_MOV format) mem
+ return (AnyV code)
+
+getRegister' _ _ (CmmLoad mem pk)
| isFloatType pk
= do
Amode addr mem_code <- getAmode mem
@@ -1096,6 +1119,9 @@ getByteReg expr = do
-- ToDo: could optimise slightly by checking for
-- byte-addressable real registers, but that will
-- happen very rarely if at all.
+ AnyV {} -> do
+ tmp <- getVectorReg
+ return (tmp, code tmp)
else getSomeReg expr -- all regs are byte-addressable on x86_64
-- Another variant: this time we want the result in a register that cannot
@@ -1116,6 +1142,9 @@ getNonClobberedReg expr = do
return (tmp, code `snocOL` reg2reg rep reg tmp)
| otherwise ->
return (reg, code)
+ AnyV code -> do
+ tmp <- getVectorReg
+ return (tmp, code tmp)
reg2reg :: Format -> Reg -> Reg -> Instr
reg2reg format src dst = MOV format (OpReg src) (OpReg dst)
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 6e5d656beb..e59594365b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -8,8 +8,9 @@
--
-----------------------------------------------------------------------------
-module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..),
- getJumpDestBlockId, canShortcut, shortcutStatics,
+module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..),
+ VecFormat(..), ScalarFormat(..),
+ JumpDest(..), getJumpDestBlockId, canShortcut, shortcutStatics,
shortcutJump, allocMoreStack,
maxSpillSlots, archWordFormat )
where
@@ -71,6 +72,19 @@ instance Instruction Instr where
mkStackAllocInstr = x86_mkStackAllocInstr
mkStackDeallocInstr = x86_mkStackDeallocInstr
+data VecFormat = VecFormat { vecLength :: !Length
+ , vecWidth :: !Width
+ , vecScalar :: !ScalarFormat
+ }
+
+instance Outputable VecFormat where
+ ppr (VecFormat l w f) = ppr (l,w,f)
+
+data ScalarFormat = FmtFloat | FmtInt
+
+instance Outputable ScalarFormat where
+ ppr FmtFloat = text "float"
+ ppr FmtInt = text "int"
-- -----------------------------------------------------------------------------
-- Intel x86 instructions
@@ -193,6 +207,7 @@ data Instr
-- Moves.
| MOV Format Operand Operand
+ | V_MOV VecFormat Operand Operand
| CMOV Cond Format Operand Reg
| MOVZxL Format Operand Operand -- format is the size of operand 1
| MOVSxL Format Operand Operand -- format is the size of operand 1
@@ -205,6 +220,7 @@ data Instr
-- Int Arithmetic.
| ADD Format Operand Operand
+ | V_ADD VecFormat Operand Operand
| ADC Format Operand Operand
| SUB Format Operand Operand
| SBB Format Operand Operand
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 095d9eba7c..7a799cfabd 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -833,6 +833,15 @@ pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst
pprInstr (CMPXCHG format src dst)
= pprFormatOpOp (sLit "cmpxchg") format src dst
+-- SIMD
+pprInstr (V_ADD format src dst)
+ | VecFormat _ W32 FmtFloat <- format = pprVecFormatOpOp (sLit "addps") format src dst
+ | VecFormat _ W64 FmtFloat <- format = pprVecFormatOpOp (sLit "addpd") format src dst
+ | VecFormat _ W64 FmtInt <- format = pprVecFormatOpOp (sLit "addpd") format src dst
+pprInstr (V_MOV format src dst)
+ | VecFormat _ W32 FmtFloat <- format = pprVecFormatOpOp (sLit "movaps") format src dst
+ | VecFormat _ W64 FmtFloat <- format = pprVecFormatOpOp (sLit "movapd") format src dst
+ | VecFormat _ _ FmtInt <- format = pprVecFormatOpOp (sLit "movdqa") format src dst
--------------------------
diff --git a/testsuite/tests/simd/Simd1.hs b/testsuite/tests/simd/Simd1.hs
new file mode 100644
index 0000000000..55e68bda40
--- /dev/null
+++ b/testsuite/tests/simd/Simd1.hs
@@ -0,0 +1,7 @@
+{-# LANGUAGE MagicHash #-}
+
+import GHC.Exts
+
+main :: IO ()
+main = do
+
diff --git a/testsuite/tests/simd/all.T b/testsuite/tests/simd/all.T
new file mode 100644
index 0000000000..e4941a44a0
--- /dev/null
+++ b/testsuite/tests/simd/all.T
@@ -0,0 +1 @@
+test('Simd1', normal, compile_and_run, [''])