diff options
Diffstat (limited to 'compiler/nativeGen/X86/CodeGen.hs')
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 247 |
1 files changed, 212 insertions, 35 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 341fa43dbc..a2e26bd68b 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -2,9 +2,7 @@ -- The default iteration limit is a bit too low for the definitions -- in this module. -#if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-} -#endif ----------------------------------------------------------------------------- -- @@ -32,6 +30,8 @@ where #include "../includes/MachDeps.h" -- NCG stuff: +import GhcPrelude + import X86.Instr import X86.Cond import X86.Regs @@ -65,7 +65,6 @@ import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol ) import ForeignCall ( CCallConv(..) ) import OrdList import Outputable -import Unique import FastString import DynFlags import Util @@ -211,6 +210,9 @@ stmtToInstrs stmt = do -> genCCall dflags is32Bit target result_regs args CmmBranch id -> genBranch id + + --We try to arrange blocks such that the likely branch is the fallthrough + --in CmmContFlowOpt. So we can assume the condition is likely false here. CmmCondBranch arg true false _ -> do b1 <- genCondJump true arg b2 <- genBranch false @@ -295,7 +297,7 @@ data Amode = Amode AddrMode InstrBlock {- -Now, given a tree (the argument to an CmmLoad) that references memory, +Now, given a tree (the argument to a CmmLoad) that references memory, produce a suitable addressing mode. A Rule of the Game (tm) for Amodes: use of the addr bit must @@ -328,7 +330,7 @@ is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = mkAsmTempLabel (getUnique blockid) + where blockLabel = blockLbl blockid -- ----------------------------------------------------------------------------- @@ -466,6 +468,20 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do r_dst_lo ) +iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do + fn <- getAnyReg expr + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + code = fn r_dst_lo + return ( + ChildCode64 (code `snocOL` + MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL` + CLTD II32 `snocOL` + MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL` + MOV II32 (OpReg edx) (OpReg r_dst_hi)) + r_dst_lo + ) + iselExpr64 expr = pprPanic "iselExpr64(i386)" (ppr expr) @@ -503,6 +519,9 @@ getRegister' dflags is32Bit (CmmReg reg) getRegister' dflags is32Bit (CmmRegOff r n) = getRegister' dflags is32Bit $ mangleIndexTree dflags r n +getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e]) + = addAlignmentCheck align <$> getRegister' dflags is32Bit e + -- for 32-bit architectuers, support some 64 -> 32 bit conversions: -- TO_W_(x), TO_W_(x >> 32) @@ -731,8 +750,10 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_F_Ne _ -> condFltReg is32Bit NE x y MO_F_Gt _ -> condFltReg is32Bit GTT x y MO_F_Ge _ -> condFltReg is32Bit GE x y - MO_F_Lt _ -> condFltReg is32Bit LTT x y - MO_F_Le _ -> condFltReg is32Bit LE x y + -- Invert comparison condition and swap operands + -- See Note [SSE Parity Checks] + MO_F_Lt _ -> condFltReg is32Bit GTT y x + MO_F_Le _ -> condFltReg is32Bit GE y x MO_Eq _ -> condIntReg EQQ x y MO_Ne _ -> condIntReg NE x y @@ -1255,6 +1276,21 @@ isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit || isSuitableFloatingPointLit lit isOperand _ _ = False +-- | Given a 'Register', produce a new 'Register' with an instruction block +-- which will check the value for alignment. Used for @-falignment-sanitisation@. +addAlignmentCheck :: Int -> Register -> Register +addAlignmentCheck align reg = + case reg of + Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt reg) + Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt reg) + where + check :: Format -> Reg -> InstrBlock + check fmt reg = + ASSERT(not $ isFloatFormat fmt) + toOL [ TEST fmt (OpImm $ ImmInt $ align-1) (OpReg reg) + , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel + ] + memConstant :: Int -> CmmLit -> NatM Amode memConstant align lit = do lbl <- getNewLabelNat @@ -1331,15 +1367,17 @@ getCondCode (CmmMachOp mop [x, y]) MO_F_Ne W32 -> condFltCode NE x y MO_F_Gt W32 -> condFltCode GTT x y MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y + -- Invert comparison condition and swap operands + -- See Note [SSE Parity Checks] + MO_F_Lt W32 -> condFltCode GTT y x + MO_F_Le W32 -> condFltCode GE y x MO_F_Eq W64 -> condFltCode EQQ x y MO_F_Ne W64 -> condFltCode NE x y MO_F_Gt W64 -> condFltCode GTT x y MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y + MO_F_Lt W64 -> condFltCode GTT y x + MO_F_Le W64 -> condFltCode GE y x _ -> condIntCode (machOpToCond mop) x y @@ -1639,11 +1677,19 @@ genCondJump' _ id bool = do else do lbl <- getBlockIdNat - -- see comment with condFltReg + -- See Note [SSE Parity Checks] let code = case cond of NE -> or_unordered GU -> plain_test GEU -> plain_test + -- Use ASSERT so we don't break releases if + -- LTT/LE creep in somehow. + LTT -> + ASSERT2(False, ppr "Should have been turned into >") + and_ordered + LE -> + ASSERT2(False, ppr "Should have been turned into >=") + and_ordered _ -> and_ordered plain_test = unitOL ( @@ -1855,6 +1901,72 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] format = intFormat width lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width)) +genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst] + args@[src, mask] = do + let platform = targetPlatform dflags + if isBmi2Enabled dflags + then do code_src <- getAnyReg src + code_mask <- getAnyReg mask + src_r <- getNewRegNat format + mask_r <- getNewRegNat format + let dst_r = getRegisterReg platform False (CmmLocal dst) + return $ code_src src_r `appOL` code_mask mask_r `appOL` + (if width == W8 then + -- The PDEP instruction doesn't take a r/m8 + unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL` + unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL` + unitOL (PDEP II16 (OpReg mask_r) (OpReg src_r ) dst_r) + else + unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL` + (if width == W8 || width == W16 then + -- We used a 16-bit destination register above, + -- so zero-extend + unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) + else nilOL) + else do + targetExpr <- cmmMakeDynamicReference dflags + CallReference lbl + let target = ForeignTarget targetExpr (ForeignConvention CCallConv + [NoHint] [NoHint] + CmmMayReturn) + genCCall dflags is32Bit target dest_regs args + where + format = intFormat width + lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width)) + +genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst] + args@[src, mask] = do + let platform = targetPlatform dflags + if isBmi2Enabled dflags + then do code_src <- getAnyReg src + code_mask <- getAnyReg mask + src_r <- getNewRegNat format + mask_r <- getNewRegNat format + let dst_r = getRegisterReg platform False (CmmLocal dst) + return $ code_src src_r `appOL` code_mask mask_r `appOL` + (if width == W8 then + -- The PEXT instruction doesn't take a r/m8 + unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL` + unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL` + unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r) + else + unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL` + (if width == W8 || width == W16 then + -- We used a 16-bit destination register above, + -- so zero-extend + unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) + else nilOL) + else do + targetExpr <- cmmMakeDynamicReference dflags + CallReference lbl + let target = ForeignTarget targetExpr (ForeignConvention CCallConv + [NoHint] [NoHint] + CmmMayReturn) + genCCall dflags is32Bit target dest_regs args + where + format = intFormat width + lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width)) + genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] | is32Bit && width == W64 = do -- Fallback to `hs_clz64` on i386 @@ -2129,6 +2241,8 @@ genCCall _ is32Bit target dest_regs args = do ADC format (OpImm (ImmInteger 0)) (OpReg reg_h) return code _ -> panic "genCCall: Wrong number of arguments/results for add2" + (PrimTarget (MO_AddWordC width), [res_r, res_c]) -> + addSubIntC platform ADD_CC (const Nothing) CARRY width res_r res_c args (PrimTarget (MO_SubWordC width), [res_r, res_c]) -> addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args (PrimTarget (MO_AddIntC width), [res_r, res_c]) -> @@ -2645,6 +2759,10 @@ outOfLineCmmOp mop res args MO_F32_Tanh -> fsLit "tanhf" MO_F32_Pwr -> fsLit "powf" + MO_F32_Asinh -> fsLit "asinhf" + MO_F32_Acosh -> fsLit "acoshf" + MO_F32_Atanh -> fsLit "atanhf" + MO_F64_Sqrt -> fsLit "sqrt" MO_F64_Fabs -> fsLit "fabs" MO_F64_Sin -> fsLit "sin" @@ -2662,15 +2780,23 @@ outOfLineCmmOp mop res args MO_F64_Tanh -> fsLit "tanh" MO_F64_Pwr -> fsLit "pow" + MO_F64_Asinh -> fsLit "asinh" + MO_F64_Acosh -> fsLit "acosh" + MO_F64_Atanh -> fsLit "atanh" + MO_Memcpy _ -> fsLit "memcpy" MO_Memset _ -> fsLit "memset" MO_Memmove _ -> fsLit "memmove" + MO_Memcmp _ -> fsLit "memcmp" MO_PopCnt _ -> fsLit "popcnt" MO_BSwap _ -> fsLit "bswap" MO_Clz w -> fsLit $ clzLabel w MO_Ctz _ -> unsupported + MO_Pdep w -> fsLit $ pdepLabel w + MO_Pext w -> fsLit $ pextLabel w + MO_AtomicRMW _ _ -> fsLit "atomicrmw" MO_AtomicRead _ -> fsLit "atomicread" MO_AtomicWrite _ -> fsLit "atomicwrite" @@ -2684,6 +2810,7 @@ outOfLineCmmOp mop res args MO_Add2 {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported @@ -2698,7 +2825,7 @@ outOfLineCmmOp mop res args genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock genSwitch dflags expr targets - | gopt Opt_PIC dflags + | positionIndependent dflags = do (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset) -- getNonClobberedReg because it needs to survive across t_code @@ -2750,23 +2877,29 @@ genSwitch dflags expr targets JMP_TBL op ids (Section ReadOnlyData lbl) lbl ] return code - where (offset, ids) = switchTargetsToTable targets + where + (offset, blockIds) = switchTargetsToTable targets + ids = map (fmap DestBlockId) blockIds generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr) generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl) - = Just (createJumpTable dflags ids section lbl) + = let getBlockId (DestBlockId id) = id + getBlockId _ = panic "Non-Label target in Jump Table" + blockIds = map (fmap getBlockId) ids + in Just (createJumpTable dflags blockIds section lbl) generateJumpTableForInstr _ _ = Nothing createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel -> GenCmmDecl (Alignment, CmmStatics) h g createJumpTable dflags ids section lbl = let jumpTable - | gopt Opt_PIC dflags = - let jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 (wordWidth dflags)) + | positionIndependent dflags = + let ww = wordWidth dflags + jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 ww) jumpTableEntryRel (Just blockid) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel (getUnique blockid) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 ww) + where blockLabel = blockLbl blockid in map jumpTableEntryRel ids | otherwise = map (jumpTableEntry dflags) ids in CmmData section (1, Statics lbl jumpTable) @@ -2797,6 +2930,59 @@ condIntReg cond x y = do return (Any II32 code) +----------------------------------------------------------- +--- Note [SSE Parity Checks] --- +----------------------------------------------------------- + +-- We have to worry about unordered operands (eg. comparisons +-- against NaN). If the operands are unordered, the comparison +-- sets the parity flag, carry flag and zero flag. +-- All comparisons are supposed to return false for unordered +-- operands except for !=, which returns true. +-- +-- Optimisation: we don't have to test the parity flag if we +-- know the test has already excluded the unordered case: eg > +-- and >= test for a zero carry flag, which can only occur for +-- ordered operands. +-- +-- By reversing comparisons we can avoid testing the parity +-- for < and <= as well. If any of the arguments is an NaN we +-- return false either way. If both arguments are valid then +-- x <= y <-> y >= x holds. So it's safe to swap these. +-- +-- We invert the condition inside getRegister'and getCondCode +-- which should cover all invertable cases. +-- All other functions translating FP comparisons to assembly +-- use these to two generate the comparison code. +-- +-- As an example consider a simple check: +-- +-- func :: Float -> Float -> Int +-- func x y = if x < y then 1 else 0 +-- +-- Which in Cmm gives the floating point comparison. +-- +-- if (%MO_F_Lt_W32(F1, F2)) goto c2gg; else goto c2gf; +-- +-- We used to compile this to an assembly code block like this: +-- _c2gh: +-- ucomiss %xmm2,%xmm1 +-- jp _c2gf +-- jb _c2gg +-- jmp _c2gf +-- +-- Where we have to introduce an explicit +-- check for unordered results (using jmp parity): +-- +-- We can avoid this by exchanging the arguments and inverting the direction +-- of the comparison. This results in the sequence of: +-- +-- ucomiss %xmm1,%xmm2 +-- ja _c2g2 +-- jmp _c2g1 +-- +-- Removing the jump reduces the pressure on the branch predidiction system +-- and plays better with the uOP cache. condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87 @@ -2815,27 +3001,18 @@ condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87 CondCode _ cond cond_code <- condFltCode cond x y tmp1 <- getNewRegNat (archWordFormat is32Bit) tmp2 <- getNewRegNat (archWordFormat is32Bit) - let - -- We have to worry about unordered operands (eg. comparisons - -- against NaN). If the operands are unordered, the comparison - -- sets the parity flag, carry flag and zero flag. - -- All comparisons are supposed to return false for unordered - -- operands except for !=, which returns true. - -- - -- Optimisation: we don't have to test the parity flag if we - -- know the test has already excluded the unordered case: eg > - -- and >= test for a zero carry flag, which can only occur for - -- ordered operands. - -- - -- ToDo: by reversing comparisons we could avoid testing the - -- parity flag in more cases. - + let -- See Note [SSE Parity Checks] code dst = cond_code `appOL` (case cond of NE -> or_unordered dst GU -> plain_test dst GEU -> plain_test dst + -- Use ASSERT so we don't break releases if these creep in. + LTT -> ASSERT2(False, ppr "Should have been turned into >") + and_ordered dst + LE -> ASSERT2(False, ppr "Should have been turned into >=") + and_ordered dst _ -> and_ordered dst) plain_test dst = toOL [ |