diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2022-02-18 12:44:52 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-02-23 13:59:23 -0500 |
commit | bc8de322d310d9dc879d3d3e7e0aa9157d8c2cf5 (patch) | |
tree | b33660f0b9c248d83e2dea1d599f75716c9da549 /compiler | |
parent | 6fa7591e832d71ebea452ee6ddf97ac513404576 (diff) | |
download | haskell-bc8de322d310d9dc879d3d3e7e0aa9157d8c2cf5.tar.gz |
NCG: inline some 64-bit primops on x86/32-bit (#5444)
Several 64-bit operation were implemented with FFI calls on 32-bit
architectures but we can easily implement them with inline assembly
code.
Also remove unused hs_int64ToWord64 and hs_word64ToInt64 C functions.
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/CodeGen.hs | 277 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/Cond.hs | 32 |
2 files changed, 272 insertions, 37 deletions
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index 74c16383f0..0d67f306dc 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -98,6 +98,12 @@ is32BitPlatform = do platform <- getPlatform return $ target32Bit platform +expect32BitPlatform :: SDoc -> NatM () +expect32BitPlatform doc = do + is32Bit <- is32BitPlatform + when (not is32Bit) $ + pprPanic "Expecting 32-bit platform" doc + sse2Enabled :: NatM Bool sse2Enabled = do config <- getConfig @@ -2165,35 +2171,35 @@ genSimplePrim bid MO_F64_Acosh [dst] [src] = genLibCCall bid genSimplePrim bid MO_F64_Atanh [dst] [src] = genLibCCall bid (fsLit "atanh") [dst] [src] genSimplePrim bid MO_SuspendThread [tok] [rs,i] = genRTSCCall bid (fsLit "suspendThread") [tok] [rs,i] genSimplePrim bid MO_ResumeThread [rs] [tok] = genRTSCCall bid (fsLit "resumeThread") [rs] [tok] -genSimplePrim bid MO_I64_ToI [dst] [src] = genPrimCCall bid (fsLit "hs_int64ToInt") [dst] [src] -genSimplePrim bid MO_I64_FromI [dst] [src] = genPrimCCall bid (fsLit "hs_intToInt64") [dst] [src] -genSimplePrim bid MO_W64_ToW [dst] [src] = genPrimCCall bid (fsLit "hs_word64ToWord") [dst] [src] -genSimplePrim bid MO_W64_FromW [dst] [src] = genPrimCCall bid (fsLit "hs_wordToWord64") [dst] [src] -genSimplePrim bid MO_x64_Neg [dst] [src] = genPrimCCall bid (fsLit "hs_neg64") [dst] [src] -genSimplePrim bid MO_x64_Add [dst] [x,y] = genPrimCCall bid (fsLit "hs_add64") [dst] [x,y] -genSimplePrim bid MO_x64_Sub [dst] [x,y] = genPrimCCall bid (fsLit "hs_sub64") [dst] [x,y] +genSimplePrim _ MO_I64_ToI [dst] [src] = genInt64ToInt dst src +genSimplePrim _ MO_I64_FromI [dst] [src] = genIntToInt64 dst src +genSimplePrim _ MO_W64_ToW [dst] [src] = genWord64ToWord dst src +genSimplePrim _ MO_W64_FromW [dst] [src] = genWordToWord64 dst src +genSimplePrim _ MO_x64_Neg [dst] [src] = genNeg64 dst src +genSimplePrim _ MO_x64_Add [dst] [x,y] = genAdd64 dst x y +genSimplePrim _ MO_x64_Sub [dst] [x,y] = genSub64 dst x y genSimplePrim bid MO_x64_Mul [dst] [x,y] = genPrimCCall bid (fsLit "hs_mul64") [dst] [x,y] genSimplePrim bid MO_I64_Quot [dst] [x,y] = genPrimCCall bid (fsLit "hs_quotInt64") [dst] [x,y] genSimplePrim bid MO_I64_Rem [dst] [x,y] = genPrimCCall bid (fsLit "hs_remInt64") [dst] [x,y] genSimplePrim bid MO_W64_Quot [dst] [x,y] = genPrimCCall bid (fsLit "hs_quotWord64") [dst] [x,y] genSimplePrim bid MO_W64_Rem [dst] [x,y] = genPrimCCall bid (fsLit "hs_remWord64") [dst] [x,y] -genSimplePrim bid MO_x64_And [dst] [x,y] = genPrimCCall bid (fsLit "hs_and64") [dst] [x,y] -genSimplePrim bid MO_x64_Or [dst] [x,y] = genPrimCCall bid (fsLit "hs_or64") [dst] [x,y] -genSimplePrim bid MO_x64_Xor [dst] [x,y] = genPrimCCall bid (fsLit "hs_xor64") [dst] [x,y] -genSimplePrim bid MO_x64_Not [dst] [src] = genPrimCCall bid (fsLit "hs_not64") [dst] [src] +genSimplePrim _ MO_x64_And [dst] [x,y] = genAnd64 dst x y +genSimplePrim _ MO_x64_Or [dst] [x,y] = genOr64 dst x y +genSimplePrim _ MO_x64_Xor [dst] [x,y] = genXor64 dst x y +genSimplePrim _ MO_x64_Not [dst] [src] = genNot64 dst src genSimplePrim bid MO_x64_Shl [dst] [x,n] = genPrimCCall bid (fsLit "hs_uncheckedShiftL64") [dst] [x,n] genSimplePrim bid MO_I64_Shr [dst] [x,n] = genPrimCCall bid (fsLit "hs_uncheckedIShiftRA64") [dst] [x,n] genSimplePrim bid MO_W64_Shr [dst] [x,n] = genPrimCCall bid (fsLit "hs_uncheckedShiftRL64") [dst] [x,n] -genSimplePrim bid MO_x64_Eq [dst] [x,y] = genPrimCCall bid (fsLit "hs_eq64") [dst] [x,y] -genSimplePrim bid MO_x64_Ne [dst] [x,y] = genPrimCCall bid (fsLit "hs_ne64") [dst] [x,y] -genSimplePrim bid MO_I64_Ge [dst] [x,y] = genPrimCCall bid (fsLit "hs_geInt64") [dst] [x,y] -genSimplePrim bid MO_I64_Gt [dst] [x,y] = genPrimCCall bid (fsLit "hs_gtInt64") [dst] [x,y] -genSimplePrim bid MO_I64_Le [dst] [x,y] = genPrimCCall bid (fsLit "hs_leInt64") [dst] [x,y] -genSimplePrim bid MO_I64_Lt [dst] [x,y] = genPrimCCall bid (fsLit "hs_ltInt64") [dst] [x,y] -genSimplePrim bid MO_W64_Ge [dst] [x,y] = genPrimCCall bid (fsLit "hs_geWord64") [dst] [x,y] -genSimplePrim bid MO_W64_Gt [dst] [x,y] = genPrimCCall bid (fsLit "hs_gtWord64") [dst] [x,y] -genSimplePrim bid MO_W64_Le [dst] [x,y] = genPrimCCall bid (fsLit "hs_leWord64") [dst] [x,y] -genSimplePrim bid MO_W64_Lt [dst] [x,y] = genPrimCCall bid (fsLit "hs_ltWord64") [dst] [x,y] +genSimplePrim _ MO_x64_Eq [dst] [x,y] = genEq64 dst x y +genSimplePrim _ MO_x64_Ne [dst] [x,y] = genNe64 dst x y +genSimplePrim _ MO_I64_Ge [dst] [x,y] = genGeInt64 dst x y +genSimplePrim _ MO_I64_Gt [dst] [x,y] = genGtInt64 dst x y +genSimplePrim _ MO_I64_Le [dst] [x,y] = genLeInt64 dst x y +genSimplePrim _ MO_I64_Lt [dst] [x,y] = genLtInt64 dst x y +genSimplePrim _ MO_W64_Ge [dst] [x,y] = genGeWord64 dst x y +genSimplePrim _ MO_W64_Gt [dst] [x,y] = genGtWord64 dst x y +genSimplePrim _ MO_W64_Le [dst] [x,y] = genLeWord64 dst x y +genSimplePrim _ MO_W64_Lt [dst] [x,y] = genLtWord64 dst x y genSimplePrim _ op dst args = do platform <- ncgPlatform <$> getConfig pprPanic "genSimplePrim: unhandled primop" (ppr (pprCallishMachOp op, dst, fmap (pdoc platform) args)) @@ -4063,3 +4069,232 @@ genQuotRem width signed res_q res_r m_arg_x_high arg_x_low arg_y = do toOL [instr format y_reg, MOV format (OpReg rax) (OpReg reg_q), MOV format (OpReg rdx) (OpReg reg_r)] + + +---------------------------------------------------------------------------- +-- The following functions implement certain 64-bit MachOps inline for 32-bit +-- architectures. On 64-bit architectures, those MachOps aren't supported and +-- calling these functions for a 64-bit target platform is considered an error +-- (hence the use of `expect32BitPlatform`). +-- +-- On 64-bit platforms, generic MachOps should be used instead of these 64-bit +-- specific ones (e.g. use MO_Add instead of MO_x64_Add). This MachOp selection +-- is done by StgToCmm. + +genInt64ToInt :: LocalReg -> CmmExpr -> NatM InstrBlock +genInt64ToInt dst src = do + expect32BitPlatform (text "genInt64ToInt") + RegCode64 code _src_hi src_lo <- iselExpr64 src + let dst_r = getLocalRegReg dst + pure $ code `snocOL` MOV II32 (OpReg src_lo) (OpReg dst_r) + +genWord64ToWord :: LocalReg -> CmmExpr -> NatM InstrBlock +genWord64ToWord dst src = do + expect32BitPlatform (text "genWord64ToWord") + RegCode64 code _src_hi src_lo <- iselExpr64 src + let dst_r = getLocalRegReg dst + pure $ code `snocOL` MOV II32 (OpReg src_lo) (OpReg dst_r) + +genIntToInt64 :: LocalReg -> CmmExpr -> NatM InstrBlock +genIntToInt64 dst src = do + expect32BitPlatform (text "genIntToInt64") + let Reg64 dst_hi dst_lo = localReg64 dst + src_code <- getAnyReg src + pure $ src_code rax `appOL` toOL + [ CLTD II32 -- sign extend EAX in EDX:EAX + , MOV II32 (OpReg rax) (OpReg dst_lo) + , MOV II32 (OpReg rdx) (OpReg dst_hi) + ] + +genWordToWord64 :: LocalReg -> CmmExpr -> NatM InstrBlock +genWordToWord64 dst src = do + expect32BitPlatform (text "genWordToWord64") + let Reg64 dst_hi dst_lo = localReg64 dst + src_code <- getAnyReg src + pure $ src_code dst_lo + `snocOL` XOR II32 (OpReg dst_hi) (OpReg dst_hi) + +genNeg64 :: LocalReg -> CmmExpr -> NatM InstrBlock +genNeg64 dst src = do + expect32BitPlatform (text "genNeg64") + let Reg64 dst_hi dst_lo = localReg64 dst + RegCode64 code src_hi src_lo <- iselExpr64 src + pure $ code `appOL` toOL + [ MOV II32 (OpReg src_lo) (OpReg dst_lo) + , MOV II32 (OpReg src_hi) (OpReg dst_hi) + , NEGI II32 (OpReg dst_lo) + , ADC II32 (OpImm (ImmInt 0)) (OpReg dst_hi) + , NEGI II32 (OpReg dst_hi) + ] + +genAdd64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genAdd64 dst x y = do + expect32BitPlatform (text "genAdd64") + let Reg64 dst_hi dst_lo = localReg64 dst + RegCode64 x_code x_hi x_lo <- iselExpr64 x + RegCode64 y_code y_hi y_lo <- iselExpr64 y + pure $ x_code `appOL` y_code `appOL` toOL + [ MOV II32 (OpReg x_lo) (OpReg dst_lo) + , MOV II32 (OpReg x_hi) (OpReg dst_hi) + , ADD II32 (OpReg y_lo) (OpReg dst_lo) + , ADC II32 (OpReg y_hi) (OpReg dst_hi) + ] + +genSub64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genSub64 dst x y = do + expect32BitPlatform (text "genSub64") + let Reg64 dst_hi dst_lo = localReg64 dst + RegCode64 x_code x_hi x_lo <- iselExpr64 x + RegCode64 y_code y_hi y_lo <- iselExpr64 y + pure $ x_code `appOL` y_code `appOL` toOL + [ MOV II32 (OpReg x_lo) (OpReg dst_lo) + , MOV II32 (OpReg x_hi) (OpReg dst_hi) + , SUB II32 (OpReg y_lo) (OpReg dst_lo) + , SBB II32 (OpReg y_hi) (OpReg dst_hi) + ] + +genAnd64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genAnd64 dst x y = do + expect32BitPlatform (text "genAnd64") + let Reg64 dst_hi dst_lo = localReg64 dst + RegCode64 x_code x_hi x_lo <- iselExpr64 x + RegCode64 y_code y_hi y_lo <- iselExpr64 y + pure $ x_code `appOL` y_code `appOL` toOL + [ MOV II32 (OpReg x_lo) (OpReg dst_lo) + , MOV II32 (OpReg x_hi) (OpReg dst_hi) + , AND II32 (OpReg y_lo) (OpReg dst_lo) + , AND II32 (OpReg y_hi) (OpReg dst_hi) + ] + +genOr64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genOr64 dst x y = do + expect32BitPlatform (text "genOr64") + let Reg64 dst_hi dst_lo = localReg64 dst + RegCode64 x_code x_hi x_lo <- iselExpr64 x + RegCode64 y_code y_hi y_lo <- iselExpr64 y + pure $ x_code `appOL` y_code `appOL` toOL + [ MOV II32 (OpReg x_lo) (OpReg dst_lo) + , MOV II32 (OpReg x_hi) (OpReg dst_hi) + , OR II32 (OpReg y_lo) (OpReg dst_lo) + , OR II32 (OpReg y_hi) (OpReg dst_hi) + ] + +genXor64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genXor64 dst x y = do + expect32BitPlatform (text "genXor64") + let Reg64 dst_hi dst_lo = localReg64 dst + RegCode64 x_code x_hi x_lo <- iselExpr64 x + RegCode64 y_code y_hi y_lo <- iselExpr64 y + pure $ x_code `appOL` y_code `appOL` toOL + [ MOV II32 (OpReg x_lo) (OpReg dst_lo) + , MOV II32 (OpReg x_hi) (OpReg dst_hi) + , XOR II32 (OpReg y_lo) (OpReg dst_lo) + , XOR II32 (OpReg y_hi) (OpReg dst_hi) + ] + +genNot64 :: LocalReg -> CmmExpr -> NatM InstrBlock +genNot64 dst src = do + expect32BitPlatform (text "genNot64") + let Reg64 dst_hi dst_lo = localReg64 dst + RegCode64 src_code src_hi src_lo <- iselExpr64 src + pure $ src_code `appOL` toOL + [ MOV II32 (OpReg src_lo) (OpReg dst_lo) + , MOV II32 (OpReg src_hi) (OpReg dst_hi) + , NOT II32 (OpReg dst_lo) + , NOT II32 (OpReg dst_hi) + ] + +genEq64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genEq64 dst x y = do + expect32BitPlatform (text "genEq64") + let dst_r = getLocalRegReg dst + RegCode64 x_code x_hi x_lo <- iselExpr64 x + RegCode64 y_code y_hi y_lo <- iselExpr64 y + Reg64 tmp_hi tmp_lo <- getNewReg64 + pure $ x_code `appOL` y_code `appOL` toOL + [ MOV II32 (OpReg x_lo) (OpReg tmp_lo) + , MOV II32 (OpReg x_hi) (OpReg tmp_hi) + , XOR II32 (OpReg y_lo) (OpReg tmp_lo) + , XOR II32 (OpReg y_hi) (OpReg tmp_hi) + , OR II32 (OpReg tmp_lo) (OpReg tmp_hi) + , SETCC EQQ (OpReg dst_r) + , MOVZxL II8 (OpReg dst_r) (OpReg dst_r) + ] + +genNe64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genNe64 dst x y = do + expect32BitPlatform (text "genNe64") + let dst_r = getLocalRegReg dst + RegCode64 x_code x_hi x_lo <- iselExpr64 x + RegCode64 y_code y_hi y_lo <- iselExpr64 y + Reg64 tmp_hi tmp_lo <- getNewReg64 + pure $ x_code `appOL` y_code `appOL` toOL + [ MOV II32 (OpReg x_lo) (OpReg tmp_lo) + , MOV II32 (OpReg x_hi) (OpReg tmp_hi) + , XOR II32 (OpReg y_lo) (OpReg tmp_lo) + , XOR II32 (OpReg y_hi) (OpReg tmp_hi) + , OR II32 (OpReg tmp_lo) (OpReg tmp_hi) + , SETCC NE (OpReg dst_r) + , MOVZxL II8 (OpReg dst_r) (OpReg dst_r) + ] + +genGtWord64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genGtWord64 dst x y = do + expect32BitPlatform (text "genGtWord64") + genPred64 LU dst y x + +genLtWord64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genLtWord64 dst x y = do + expect32BitPlatform (text "genLtWord64") + genPred64 LU dst x y + +genGeWord64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genGeWord64 dst x y = do + expect32BitPlatform (text "genGeWord64") + genPred64 GEU dst x y + +genLeWord64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genLeWord64 dst x y = do + expect32BitPlatform (text "genLeWord64") + genPred64 GEU dst y x + +genGtInt64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genGtInt64 dst x y = do + expect32BitPlatform (text "genGtInt64") + genPred64 LTT dst y x + +genLtInt64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genLtInt64 dst x y = do + expect32BitPlatform (text "genLtInt64") + genPred64 LTT dst x y + +genGeInt64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genGeInt64 dst x y = do + expect32BitPlatform (text "genGeInt64") + genPred64 GE dst x y + +genLeInt64 :: LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genLeInt64 dst x y = do + expect32BitPlatform (text "genLeInt64") + genPred64 GE dst y x + +genPred64 :: Cond -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock +genPred64 cond dst x y = do + -- we can only rely on CF/SF/OF flags! + -- Not on ZF, which doesn't take into account the lower parts. + massert (cond `elem` [LU,GEU,LTT,GE]) + + let dst_r = getLocalRegReg dst + RegCode64 x_code x_hi x_lo <- iselExpr64 x + RegCode64 y_code y_hi y_lo <- iselExpr64 y + -- Basically we perform a subtraction with borrow. + -- As we don't need to result, we can use CMP instead of SUB for the low part + -- (it sets the borrow flag just like SUB does) + pure $ x_code `appOL` y_code `appOL` toOL + [ MOV II32 (OpReg x_hi) (OpReg dst_r) + , CMP II32 (OpReg y_lo) (OpReg x_lo) + , SBB II32 (OpReg y_hi) (OpReg dst_r) + , SETCC cond (OpReg dst_r) + , MOVZxL II8 (OpReg dst_r) (OpReg dst_r) + ] + diff --git a/compiler/GHC/CmmToAsm/X86/Cond.hs b/compiler/GHC/CmmToAsm/X86/Cond.hs index 728a281bce..325160e2bd 100644 --- a/compiler/GHC/CmmToAsm/X86/Cond.hs +++ b/compiler/GHC/CmmToAsm/X86/Cond.hs @@ -11,22 +11,22 @@ import GHC.Prelude data Cond = ALWAYS -- What's really used? ToDo - | EQQ -- je/jz -> zf = 1 - | GE -- jge - | GEU -- ae - | GTT -- jg - | GU -- ja - | LE -- jle - | LEU -- jbe - | LTT -- jl - | LU -- jb - | NE -- jne - | NEG -- js - | POS -- jns - | CARRY -- jc - | OFLO -- jo - | PARITY -- jp - | NOTPARITY -- jnp + | EQQ -- je/jz -> zf=1 + | GE -- jge -> sf=of + | GEU -- ae -> cf=0 + | GTT -- jg -> zf=0 && sf=of + | GU -- ja -> cf=0 && zf=0 + | LE -- jle -> zf=1 || sf/=of + | LEU -- jbe -> cf=1 || zf=1 + | LTT -- jl -> sf/=of + | LU -- jb -> cf=1 + | NE -- jne -> zf=0 + | NEG -- js -> sf=1 + | POS -- jns -> sf=0 + | CARRY -- jc -> cf=1 + | OFLO -- jo -> of=1 + | PARITY -- jp -> pf=1 + | NOTPARITY -- jnp -> pf=0 deriving Eq condToUnsigned :: Cond -> Cond |