summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2022-02-18 12:44:52 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-02-23 13:59:23 -0500
commitbc8de322d310d9dc879d3d3e7e0aa9157d8c2cf5 (patch)
treeb33660f0b9c248d83e2dea1d599f75716c9da549 /compiler
parent6fa7591e832d71ebea452ee6ddf97ac513404576 (diff)
downloadhaskell-bc8de322d310d9dc879d3d3e7e0aa9157d8c2cf5.tar.gz
NCG: inline some 64-bit primops on x86/32-bit (#5444)
Several 64-bit operation were implemented with FFI calls on 32-bit architectures but we can easily implement them with inline assembly code. Also remove unused hs_int64ToWord64 and hs_word64ToInt64 C functions.
Diffstat (limited to 'compiler')
-rw-r--r--compiler/GHC/CmmToAsm/X86/CodeGen.hs277
-rw-r--r--compiler/GHC/CmmToAsm/X86/Cond.hs32
2 files changed, 272 insertions, 37 deletions
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
index 74c16383f0..0d67f306dc 100644
--- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
@@ -98,6 +98,12 @@ is32BitPlatform = do
platform <- getPlatform
return $ target32Bit platform
+expect32BitPlatform :: SDoc -> NatM ()
+expect32BitPlatform doc = do
+ is32Bit <- is32BitPlatform
+ when (not is32Bit) $
+ pprPanic "Expecting 32-bit platform" doc
+
sse2Enabled :: NatM Bool
sse2Enabled = do
config <- getConfig
@@ -2165,35 +2171,35 @@ genSimplePrim bid MO_F64_Acosh [dst] [src] = genLibCCall bid
genSimplePrim bid MO_F64_Atanh [dst] [src] = genLibCCall bid (fsLit "atanh") [dst] [src]
genSimplePrim bid MO_SuspendThread [tok] [rs,i] = genRTSCCall bid (fsLit "suspendThread") [tok] [rs,i]
genSimplePrim bid MO_ResumeThread [rs] [tok] = genRTSCCall bid (fsLit "resumeThread") [rs] [tok]
-genSimplePrim bid MO_I64_ToI [dst] [src] = genPrimCCall bid (fsLit "hs_int64ToInt") [dst] [src]
-genSimplePrim bid MO_I64_FromI [dst] [src] = genPrimCCall bid (fsLit "hs_intToInt64") [dst] [src]
-genSimplePrim bid MO_W64_ToW [dst] [src] = genPrimCCall bid (fsLit "hs_word64ToWord") [dst] [src]
-genSimplePrim bid MO_W64_FromW [dst] [src] = genPrimCCall bid (fsLit "hs_wordToWord64") [dst] [src]
-genSimplePrim bid MO_x64_Neg [dst] [src] = genPrimCCall bid (fsLit "hs_neg64") [dst] [src]
-genSimplePrim bid MO_x64_Add [dst] [x,y] = genPrimCCall bid (fsLit "hs_add64") [dst] [x,y]
-genSimplePrim bid MO_x64_Sub [dst] [x,y] = genPrimCCall bid (fsLit "hs_sub64") [dst] [x,y]
+genSimplePrim _ MO_I64_ToI [dst] [src] = genInt64ToInt dst src
+genSimplePrim _ MO_I64_FromI [dst] [src] = genIntToInt64 dst src
+genSimplePrim _ MO_W64_ToW [dst] [src] = genWord64ToWord dst src
+genSimplePrim _ MO_W64_FromW [dst] [src] = genWordToWord64 dst src
+genSimplePrim _ MO_x64_Neg [dst] [src] = genNeg64 dst src
+genSimplePrim _ MO_x64_Add [dst] [x,y] = genAdd64 dst x y
+genSimplePrim _ MO_x64_Sub [dst] [x,y] = genSub64 dst x y
genSimplePrim bid MO_x64_Mul [dst] [x,y] = genPrimCCall bid (fsLit "hs_mul64") [dst] [x,y]
genSimplePrim bid MO_I64_Quot [dst] [x,y] = genPrimCCall bid (fsLit "hs_quotInt64") [dst] [x,y]
genSimplePrim bid MO_I64_Rem [dst] [x,y] = genPrimCCall bid (fsLit "hs_remInt64") [dst] [x,y]
genSimplePrim bid MO_W64_Quot [dst] [x,y] = genPrimCCall bid (fsLit "hs_quotWord64") [dst] [x,y]
genSimplePrim bid MO_W64_Rem [dst] [x,y] = genPrimCCall bid (fsLit "hs_remWord64") [dst] [x,y]
-genSimplePrim bid MO_x64_And [dst] [x,y] = genPrimCCall bid (fsLit "hs_and64") [dst] [x,y]
-genSimplePrim bid MO_x64_Or [dst] [x,y] = genPrimCCall bid (fsLit "hs_or64") [dst] [x,y]
-genSimplePrim bid MO_x64_Xor [dst] [x,y] = genPrimCCall bid (fsLit "hs_xor64") [dst] [x,y]
-genSimplePrim bid MO_x64_Not [dst] [src] = genPrimCCall bid (fsLit "hs_not64") [dst] [src]
+genSimplePrim _ MO_x64_And [dst] [x,y] = genAnd64 dst x y
+genSimplePrim _ MO_x64_Or [dst] [x,y] = genOr64 dst x y
+genSimplePrim _ MO_x64_Xor [dst] [x,y] = genXor64 dst x y
+genSimplePrim _ MO_x64_Not [dst] [src] = genNot64 dst src
genSimplePrim bid MO_x64_Shl [dst] [x,n] = genPrimCCall bid (fsLit "hs_uncheckedShiftL64") [dst] [x,n]
genSimplePrim bid MO_I64_Shr [dst] [x,n] = genPrimCCall bid (fsLit "hs_uncheckedIShiftRA64") [dst] [x,n]
genSimplePrim bid MO_W64_Shr [dst] [x,n] = genPrimCCall bid (fsLit "hs_uncheckedShiftRL64") [dst] [x,n]
-genSimplePrim bid MO_x64_Eq [dst] [x,y] = genPrimCCall bid (fsLit "hs_eq64") [dst] [x,y]
-genSimplePrim bid MO_x64_Ne [dst] [x,y] = genPrimCCall bid (fsLit "hs_ne64") [dst] [x,y]
-genSimplePrim bid MO_I64_Ge [dst] [x,y] = genPrimCCall bid (fsLit "hs_geInt64") [dst] [x,y]
-genSimplePrim bid MO_I64_Gt [dst] [x,y] = genPrimCCall bid (fsLit "hs_gtInt64") [dst] [x,y]
-genSimplePrim bid MO_I64_Le [dst] [x,y] = genPrimCCall bid (fsLit "hs_leInt64") [dst] [x,y]
-genSimplePrim bid MO_I64_Lt [dst] [x,y] = genPrimCCall bid (fsLit "hs_ltInt64") [dst] [x,y]
-genSimplePrim bid MO_W64_Ge [dst] [x,y] = genPrimCCall bid (fsLit "hs_geWord64") [dst] [x,y]
-genSimplePrim bid MO_W64_Gt [dst] [x,y] = genPrimCCall bid (fsLit "hs_gtWord64") [dst] [x,y]
-genSimplePrim bid MO_W64_Le [dst] [x,y] = genPrimCCall bid (fsLit "hs_leWord64") [dst] [x,y]
-genSimplePrim bid MO_W64_Lt [dst] [x,y] = genPrimCCall bid (fsLit "hs_ltWord64") [dst] [x,y]
+genSimplePrim _ MO_x64_Eq [dst] [x,y] = genEq64 dst x y
+genSimplePrim _ MO_x64_Ne [dst] [x,y] = genNe64 dst x y
+genSimplePrim _ MO_I64_Ge [dst] [x,y] = genGeInt64 dst x y
+genSimplePrim _ MO_I64_Gt [dst] [x,y] = genGtInt64 dst x y
+genSimplePrim _ MO_I64_Le [dst] [x,y] = genLeInt64 dst x y
+genSimplePrim _ MO_I64_Lt [dst] [x,y] = genLtInt64 dst x y
+genSimplePrim _ MO_W64_Ge [dst] [x,y] = genGeWord64 dst x y
+genSimplePrim _ MO_W64_Gt [dst] [x,y] = genGtWord64 dst x y
+genSimplePrim _ MO_W64_Le [dst] [x,y] = genLeWord64 dst x y
+genSimplePrim _ MO_W64_Lt [dst] [x,y] = genLtWord64 dst x y
genSimplePrim _ op dst args = do
platform <- ncgPlatform <$> getConfig
pprPanic "genSimplePrim: unhandled primop" (ppr (pprCallishMachOp op, dst, fmap (pdoc platform) args))
@@ -4063,3 +4069,232 @@ genQuotRem width signed res_q res_r m_arg_x_high arg_x_low arg_y = do
toOL [instr format y_reg,
MOV format (OpReg rax) (OpReg reg_q),
MOV format (OpReg rdx) (OpReg reg_r)]
+
+
+----------------------------------------------------------------------------
+-- The following functions implement certain 64-bit MachOps inline for 32-bit
+-- architectures. On 64-bit architectures, those MachOps aren't supported and
+-- calling these functions for a 64-bit target platform is considered an error
+-- (hence the use of `expect32BitPlatform`).
+--
+-- On 64-bit platforms, generic MachOps should be used instead of these 64-bit
+-- specific ones (e.g. use MO_Add instead of MO_x64_Add). This MachOp selection
+-- is done by StgToCmm.
+
+genInt64ToInt :: LocalReg -> CmmExpr -> NatM InstrBlock
+genInt64ToInt dst src = do
+ expect32BitPlatform (text "genInt64ToInt")
+ RegCode64 code _src_hi src_lo <- iselExpr64 src
+ let dst_r = getLocalRegReg dst
+ pure $ code `snocOL` MOV II32 (OpReg src_lo) (OpReg dst_r)
+
+genWord64ToWord :: LocalReg -> CmmExpr -> NatM InstrBlock
+genWord64ToWord dst src = do
+ expect32BitPlatform (text "genWord64ToWord")
+ RegCode64 code _src_hi src_lo <- iselExpr64 src
+ let dst_r = getLocalRegReg dst
+ pure $ code `snocOL` MOV II32 (OpReg src_lo) (OpReg dst_r)
+
+genIntToInt64 :: LocalReg -> CmmExpr -> NatM InstrBlock
+genIntToInt64 dst src = do
+ expect32BitPlatform (text "genIntToInt64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ src_code <- getAnyReg src
+ pure $ src_code rax `appOL` toOL
+ [ CLTD II32 -- sign extend EAX in EDX:EAX
+ , MOV II32 (OpReg rax) (OpReg dst_lo)
+ , MOV II32 (OpReg rdx) (OpReg dst_hi)
+ ]
+
+genWordToWord64 :: LocalReg -> CmmExpr -> NatM InstrBlock
+genWordToWord64 dst src = do
+ expect32BitPlatform (text "genWordToWord64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ src_code <- getAnyReg src
+ pure $ src_code dst_lo
+ `snocOL` XOR II32 (OpReg dst_hi) (OpReg dst_hi)
+
+genNeg64 :: LocalReg -> CmmExpr -> NatM InstrBlock
+genNeg64 dst src = do
+ expect32BitPlatform (text "genNeg64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ RegCode64 code src_hi src_lo <- iselExpr64 src
+ pure $ code `appOL` toOL
+ [ MOV II32 (OpReg src_lo) (OpReg dst_lo)
+ , MOV II32 (OpReg src_hi) (OpReg dst_hi)
+ , NEGI II32 (OpReg dst_lo)
+ , ADC II32 (OpImm (ImmInt 0)) (OpReg dst_hi)
+ , NEGI II32 (OpReg dst_hi)
+ ]
+
+genAdd64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genAdd64 dst x y = do
+ expect32BitPlatform (text "genAdd64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ RegCode64 x_code x_hi x_lo <- iselExpr64 x
+ RegCode64 y_code y_hi y_lo <- iselExpr64 y
+ pure $ x_code `appOL` y_code `appOL` toOL
+ [ MOV II32 (OpReg x_lo) (OpReg dst_lo)
+ , MOV II32 (OpReg x_hi) (OpReg dst_hi)
+ , ADD II32 (OpReg y_lo) (OpReg dst_lo)
+ , ADC II32 (OpReg y_hi) (OpReg dst_hi)
+ ]
+
+genSub64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genSub64 dst x y = do
+ expect32BitPlatform (text "genSub64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ RegCode64 x_code x_hi x_lo <- iselExpr64 x
+ RegCode64 y_code y_hi y_lo <- iselExpr64 y
+ pure $ x_code `appOL` y_code `appOL` toOL
+ [ MOV II32 (OpReg x_lo) (OpReg dst_lo)
+ , MOV II32 (OpReg x_hi) (OpReg dst_hi)
+ , SUB II32 (OpReg y_lo) (OpReg dst_lo)
+ , SBB II32 (OpReg y_hi) (OpReg dst_hi)
+ ]
+
+genAnd64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genAnd64 dst x y = do
+ expect32BitPlatform (text "genAnd64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ RegCode64 x_code x_hi x_lo <- iselExpr64 x
+ RegCode64 y_code y_hi y_lo <- iselExpr64 y
+ pure $ x_code `appOL` y_code `appOL` toOL
+ [ MOV II32 (OpReg x_lo) (OpReg dst_lo)
+ , MOV II32 (OpReg x_hi) (OpReg dst_hi)
+ , AND II32 (OpReg y_lo) (OpReg dst_lo)
+ , AND II32 (OpReg y_hi) (OpReg dst_hi)
+ ]
+
+genOr64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genOr64 dst x y = do
+ expect32BitPlatform (text "genOr64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ RegCode64 x_code x_hi x_lo <- iselExpr64 x
+ RegCode64 y_code y_hi y_lo <- iselExpr64 y
+ pure $ x_code `appOL` y_code `appOL` toOL
+ [ MOV II32 (OpReg x_lo) (OpReg dst_lo)
+ , MOV II32 (OpReg x_hi) (OpReg dst_hi)
+ , OR II32 (OpReg y_lo) (OpReg dst_lo)
+ , OR II32 (OpReg y_hi) (OpReg dst_hi)
+ ]
+
+genXor64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genXor64 dst x y = do
+ expect32BitPlatform (text "genXor64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ RegCode64 x_code x_hi x_lo <- iselExpr64 x
+ RegCode64 y_code y_hi y_lo <- iselExpr64 y
+ pure $ x_code `appOL` y_code `appOL` toOL
+ [ MOV II32 (OpReg x_lo) (OpReg dst_lo)
+ , MOV II32 (OpReg x_hi) (OpReg dst_hi)
+ , XOR II32 (OpReg y_lo) (OpReg dst_lo)
+ , XOR II32 (OpReg y_hi) (OpReg dst_hi)
+ ]
+
+genNot64 :: LocalReg -> CmmExpr -> NatM InstrBlock
+genNot64 dst src = do
+ expect32BitPlatform (text "genNot64")
+ let Reg64 dst_hi dst_lo = localReg64 dst
+ RegCode64 src_code src_hi src_lo <- iselExpr64 src
+ pure $ src_code `appOL` toOL
+ [ MOV II32 (OpReg src_lo) (OpReg dst_lo)
+ , MOV II32 (OpReg src_hi) (OpReg dst_hi)
+ , NOT II32 (OpReg dst_lo)
+ , NOT II32 (OpReg dst_hi)
+ ]
+
+genEq64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genEq64 dst x y = do
+ expect32BitPlatform (text "genEq64")
+ let dst_r = getLocalRegReg dst
+ RegCode64 x_code x_hi x_lo <- iselExpr64 x
+ RegCode64 y_code y_hi y_lo <- iselExpr64 y
+ Reg64 tmp_hi tmp_lo <- getNewReg64
+ pure $ x_code `appOL` y_code `appOL` toOL
+ [ MOV II32 (OpReg x_lo) (OpReg tmp_lo)
+ , MOV II32 (OpReg x_hi) (OpReg tmp_hi)
+ , XOR II32 (OpReg y_lo) (OpReg tmp_lo)
+ , XOR II32 (OpReg y_hi) (OpReg tmp_hi)
+ , OR II32 (OpReg tmp_lo) (OpReg tmp_hi)
+ , SETCC EQQ (OpReg dst_r)
+ , MOVZxL II8 (OpReg dst_r) (OpReg dst_r)
+ ]
+
+genNe64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genNe64 dst x y = do
+ expect32BitPlatform (text "genNe64")
+ let dst_r = getLocalRegReg dst
+ RegCode64 x_code x_hi x_lo <- iselExpr64 x
+ RegCode64 y_code y_hi y_lo <- iselExpr64 y
+ Reg64 tmp_hi tmp_lo <- getNewReg64
+ pure $ x_code `appOL` y_code `appOL` toOL
+ [ MOV II32 (OpReg x_lo) (OpReg tmp_lo)
+ , MOV II32 (OpReg x_hi) (OpReg tmp_hi)
+ , XOR II32 (OpReg y_lo) (OpReg tmp_lo)
+ , XOR II32 (OpReg y_hi) (OpReg tmp_hi)
+ , OR II32 (OpReg tmp_lo) (OpReg tmp_hi)
+ , SETCC NE (OpReg dst_r)
+ , MOVZxL II8 (OpReg dst_r) (OpReg dst_r)
+ ]
+
+genGtWord64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genGtWord64 dst x y = do
+ expect32BitPlatform (text "genGtWord64")
+ genPred64 LU dst y x
+
+genLtWord64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genLtWord64 dst x y = do
+ expect32BitPlatform (text "genLtWord64")
+ genPred64 LU dst x y
+
+genGeWord64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genGeWord64 dst x y = do
+ expect32BitPlatform (text "genGeWord64")
+ genPred64 GEU dst x y
+
+genLeWord64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genLeWord64 dst x y = do
+ expect32BitPlatform (text "genLeWord64")
+ genPred64 GEU dst y x
+
+genGtInt64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genGtInt64 dst x y = do
+ expect32BitPlatform (text "genGtInt64")
+ genPred64 LTT dst y x
+
+genLtInt64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genLtInt64 dst x y = do
+ expect32BitPlatform (text "genLtInt64")
+ genPred64 LTT dst x y
+
+genGeInt64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genGeInt64 dst x y = do
+ expect32BitPlatform (text "genGeInt64")
+ genPred64 GE dst x y
+
+genLeInt64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genLeInt64 dst x y = do
+ expect32BitPlatform (text "genLeInt64")
+ genPred64 GE dst y x
+
+genPred64 :: Cond -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genPred64 cond dst x y = do
+ -- we can only rely on CF/SF/OF flags!
+ -- Not on ZF, which doesn't take into account the lower parts.
+ massert (cond `elem` [LU,GEU,LTT,GE])
+
+ let dst_r = getLocalRegReg dst
+ RegCode64 x_code x_hi x_lo <- iselExpr64 x
+ RegCode64 y_code y_hi y_lo <- iselExpr64 y
+ -- Basically we perform a subtraction with borrow.
+ -- As we don't need to result, we can use CMP instead of SUB for the low part
+ -- (it sets the borrow flag just like SUB does)
+ pure $ x_code `appOL` y_code `appOL` toOL
+ [ MOV II32 (OpReg x_hi) (OpReg dst_r)
+ , CMP II32 (OpReg y_lo) (OpReg x_lo)
+ , SBB II32 (OpReg y_hi) (OpReg dst_r)
+ , SETCC cond (OpReg dst_r)
+ , MOVZxL II8 (OpReg dst_r) (OpReg dst_r)
+ ]
+
diff --git a/compiler/GHC/CmmToAsm/X86/Cond.hs b/compiler/GHC/CmmToAsm/X86/Cond.hs
index 728a281bce..325160e2bd 100644
--- a/compiler/GHC/CmmToAsm/X86/Cond.hs
+++ b/compiler/GHC/CmmToAsm/X86/Cond.hs
@@ -11,22 +11,22 @@ import GHC.Prelude
data Cond
= ALWAYS -- What's really used? ToDo
- | EQQ -- je/jz -> zf = 1
- | GE -- jge
- | GEU -- ae
- | GTT -- jg
- | GU -- ja
- | LE -- jle
- | LEU -- jbe
- | LTT -- jl
- | LU -- jb
- | NE -- jne
- | NEG -- js
- | POS -- jns
- | CARRY -- jc
- | OFLO -- jo
- | PARITY -- jp
- | NOTPARITY -- jnp
+ | EQQ -- je/jz -> zf=1
+ | GE -- jge -> sf=of
+ | GEU -- ae -> cf=0
+ | GTT -- jg -> zf=0 && sf=of
+ | GU -- ja -> cf=0 && zf=0
+ | LE -- jle -> zf=1 || sf/=of
+ | LEU -- jbe -> cf=1 || zf=1
+ | LTT -- jl -> sf/=of
+ | LU -- jb -> cf=1
+ | NE -- jne -> zf=0
+ | NEG -- js -> sf=1
+ | POS -- jns -> sf=0
+ | CARRY -- jc -> cf=1
+ | OFLO -- jo -> of=1
+ | PARITY -- jp -> pf=1
+ | NOTPARITY -- jnp -> pf=0
deriving Eq
condToUnsigned :: Cond -> Cond