diff options
Diffstat (limited to 'compiler/nativeGen')
63 files changed, 1073 insertions, 317 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.hs b/compiler/nativeGen/AsmCodeGen.hs index 45d170e28d..79c3440ff6 100644 --- a/compiler/nativeGen/AsmCodeGen.hs +++ b/compiler/nativeGen/AsmCodeGen.hs @@ -25,6 +25,8 @@ module AsmCodeGen ( #include "nativeGen/NCG.h" +import GhcPrelude + import qualified X86.CodeGen import qualified X86.Regs import qualified X86.Instr @@ -363,7 +365,7 @@ finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs -- build the global register conflict graph let graphGlobal - = foldl Color.union Color.initGraph + = foldl' Color.union Color.initGraph $ [ Color.raGraph stat | stat@Color.RegAllocStatsStart{} <- stats] @@ -927,16 +929,18 @@ generateJumpTables ncgImpl xs = concatMap f xs shortcutBranches :: DynFlags - -> NcgImpl statics instr jumpDest + -> NcgImpl statics instr jumpDest -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] shortcutBranches dflags ncgImpl tops - | optLevel dflags < 1 = tops -- only with -O or higher - | otherwise = map (apply_mapping ncgImpl mapping) tops' + | gopt Opt_AsmShortcutting dflags + = map (apply_mapping ncgImpl mapping) tops' + | otherwise + = tops where (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops - mapping = foldr plusUFM emptyUFM mappings + mapping = plusUFMList mappings build_mapping :: NcgImpl statics instr jumpDest -> GenCmmDecl d (LabelMap t) (ListGraph instr) @@ -953,7 +957,7 @@ build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks))) -- shorted. -- Don't completely eliminate loops here -- that can leave a dangling jump! (_, shortcut_blocks, others) = - foldl split (setEmpty :: LabelSet, [], []) blocks + foldl' split (setEmpty :: LabelSet, [], []) blocks split (s, shortcut_blocks, others) b@(BasicBlock id [insn]) | Just jd <- canShortcut ncgImpl insn, Just dest <- getJumpDestBlockId ncgImpl jd, @@ -970,7 +974,7 @@ build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks))) has_info l = mapMember l info -- build a mapping from BlockId to JumpDest for shorting branches - mapping = foldl add emptyUFM shortcut_blocks + mapping = foldl' add emptyUFM shortcut_blocks add ufm (id,dest) = addToUFM ufm id dest apply_mapping :: NcgImpl statics instr jumpDest @@ -1212,15 +1216,15 @@ cmmExprNative referenceKind expr = do -- to use the register table, so we replace these registers -- with the corresponding labels: CmmReg (CmmGlobal EagerBlackholeInfo) - | arch == ArchPPC && not (gopt Opt_PIC dflags) + | arch == ArchPPC && not (positionIndependent dflags) -> cmmExprNative referenceKind $ CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info"))) CmmReg (CmmGlobal GCEnter1) - | arch == ArchPPC && not (gopt Opt_PIC dflags) + | arch == ArchPPC && not (positionIndependent dflags) -> cmmExprNative referenceKind $ CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1"))) CmmReg (CmmGlobal GCFun) - | arch == ArchPPC && not (gopt Opt_PIC dflags) + | arch == ArchPPC && not (positionIndependent dflags) -> cmmExprNative referenceKind $ CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun"))) diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs index c52fe10b13..399d646000 100644 --- a/compiler/nativeGen/CPrim.hs +++ b/compiler/nativeGen/CPrim.hs @@ -5,12 +5,16 @@ module CPrim , atomicRMWLabel , cmpxchgLabel , popCntLabel + , pdepLabel + , pextLabel , bSwapLabel , clzLabel , ctzLabel , word2FloatLabel ) where +import GhcPrelude + import CmmType import CmmMachOp import Outputable @@ -24,6 +28,24 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w pprWidth W64 = "64" pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w) +pdepLabel :: Width -> String +pdepLabel w = "hs_pdep" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "pdepLabel: Unsupported word width " (ppr w) + +pextLabel :: Width -> String +pextLabel w = "hs_pext" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "pextLabel: Unsupported word width " (ppr w) + bSwapLabel :: Width -> String bSwapLabel w = "hs_bswap" ++ pprWidth w where diff --git a/compiler/nativeGen/Dwarf.hs b/compiler/nativeGen/Dwarf.hs index afeac030fd..0e645a2a56 100644 --- a/compiler/nativeGen/Dwarf.hs +++ b/compiler/nativeGen/Dwarf.hs @@ -2,6 +2,8 @@ module Dwarf ( dwarfGen ) where +import GhcPrelude + import CLabel import CmmExpr ( GlobalReg(..) ) import Config ( cProjectName, cProjectVersion ) @@ -147,7 +149,7 @@ debugSplitProcs b = concat $ H.mapElems $ mergeMaps $ map (split Nothing) b {- Note [Splitting DebugBlocks] -DWARF requires that we break up the the nested DebugBlocks produced from +DWARF requires that we break up the nested DebugBlocks produced from the C-- AST. For instance, we begin with tick trees containing nested procs. For example, @@ -180,10 +182,17 @@ procToDwarf df prc _otherwise -> showSDocDump df $ ppr $ dblLabel prc , dwLabel = dblCLabel prc , dwParent = fmap mkAsmTempDieLabel - $ mfilter (/= dblCLabel prc) + $ mfilter goodParent $ fmap dblCLabel (dblParent prc) - -- Omit parent if it would be self-referential } + where + goodParent a | a == dblCLabel prc = False + -- Omit parent if it would be self-referential + goodParent a | not (externallyVisibleCLabel a) + , debugLevel df < 2 = False + -- We strip block information when running -g0 or -g1, don't + -- refer to blocks in that case. Fixes #14894. + goodParent _ = True -- | Generate DWARF info for a block blockToDwarf :: DynFlags -> DebugBlock -> DwarfInfo diff --git a/compiler/nativeGen/Dwarf/Constants.hs b/compiler/nativeGen/Dwarf/Constants.hs index a8034ef295..db5395af35 100644 --- a/compiler/nativeGen/Dwarf/Constants.hs +++ b/compiler/nativeGen/Dwarf/Constants.hs @@ -3,6 +3,8 @@ module Dwarf.Constants where +import GhcPrelude + import AsmUtils import FastString import Platform diff --git a/compiler/nativeGen/Dwarf/Types.hs b/compiler/nativeGen/Dwarf/Types.hs index 3c4501f613..25629448dd 100644 --- a/compiler/nativeGen/Dwarf/Types.hs +++ b/compiler/nativeGen/Dwarf/Types.hs @@ -22,6 +22,8 @@ module Dwarf.Types ) where +import GhcPrelude + import Debug import CLabel import CmmExpr ( GlobalReg(..) ) @@ -344,7 +346,7 @@ pprFrameProc frameLbl initUw (DwarfFrameProc procLbl hasInfo blocks) procEnd = mkAsmTempEndLabel procLbl ifInfo str = if hasInfo then text str else empty -- see [Note: Info Offset] - in vcat [ ifPprDebug $ text "# Unwinding for" <+> ppr procLbl <> colon + in vcat [ whenPprDebug $ text "# Unwinding for" <+> ppr procLbl <> colon , pprData4' (ppr fdeEndLabel <> char '-' <> ppr fdeLabel) , ppr fdeLabel <> colon , pprData4' (ppr frameLbl <> char '-' <> @@ -413,6 +415,12 @@ pprFrameBlock (DwarfFrameBlock hasInfo uws0) = -- Note that this will not prevent GDB from failing to look-up the -- correct function name for the frame, as that uses the symbol table, -- which we can not manipulate as easily. +-- +-- There's a GDB patch to address this at [1]. At the moment of writing +-- it's not merged, so I recommend building GDB with the patch if you +-- care about unwinding. The hack above doesn't cover every case. +-- +-- [1] https://sourceware.org/ml/gdb-patches/2018-02/msg00055.html -- | Get DWARF register ID for a given GlobalReg dwarfGlobalRegNo :: Platform -> GlobalReg -> Word8 @@ -490,9 +498,11 @@ pprUnwindExpr spIsCFA expr pprE (UwPlus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_plus pprE (UwMinus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_minus pprE (UwTimes u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_mul - in text "\t.uleb128 1f-.-1" $$ -- DW_FORM_block length + in text "\t.uleb128 2f-1f" $$ -- DW_FORM_block length + -- computed as the difference of the following local labels 2: and 1: + text "1:" $$ pprE expr $$ - text "1:" + text "2:" -- | Generate code for re-setting the unwind information for a -- register to @undefined@ diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs index 00811f1375..82ecbecc14 100644 --- a/compiler/nativeGen/Format.hs +++ b/compiler/nativeGen/Format.hs @@ -20,6 +20,8 @@ module Format ( where +import GhcPrelude + import Cmm import Outputable @@ -68,6 +70,7 @@ floatFormat width = case width of W32 -> FF32 W64 -> FF64 + W80 -> FF80 other -> pprPanic "Format.floatFormat" (ppr other) diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs index 515d4f3d85..0bd99fbee8 100644 --- a/compiler/nativeGen/Instruction.hs +++ b/compiler/nativeGen/Instruction.hs @@ -14,6 +14,8 @@ module Instruction ( where +import GhcPrelude + import Reg import BlockId @@ -189,14 +191,12 @@ class Instruction instr where -- Subtract an amount from the C stack pointer mkStackAllocInstr - :: Platform -- TODO: remove (needed by x86/x86_64 - -- because they share an Instr type) + :: Platform -> Int - -> instr + -> [instr] -- Add an amount to the C stack pointer mkStackDeallocInstr - :: Platform -- TODO: remove (needed by x86/x86_64 - -- because they share an Instr type) + :: Platform -> Int - -> instr + -> [instr] diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs index 6af0df5b01..b9532e17b5 100644 --- a/compiler/nativeGen/NCGMonad.hs +++ b/compiler/nativeGen/NCGMonad.hs @@ -37,6 +37,8 @@ where #include "HsVersions.h" +import GhcPrelude + import Reg import Format import TargetReg @@ -44,7 +46,7 @@ import TargetReg import BlockId import Hoopl.Collections import Hoopl.Label -import CLabel ( CLabel, mkAsmTempLabel ) +import CLabel ( CLabel ) import Debug import FastString ( FastString ) import UniqFM @@ -158,8 +160,7 @@ getBlockIdNat getNewLabelNat :: NatM CLabel getNewLabelNat - = do u <- getUniqueNat - return (mkAsmTempLabel u) + = blockLbl <$> getBlockIdNat getNewRegNat :: Format -> NatM Reg diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs index bef0a21235..2f300c4614 100644 --- a/compiler/nativeGen/PIC.hs +++ b/compiler/nativeGen/PIC.hs @@ -47,6 +47,8 @@ module PIC ( where +import GhcPrelude + import qualified PPC.Instr as PPC import qualified PPC.Regs as PPC @@ -162,7 +164,7 @@ cmmMakePicReference dflags lbl | OSAIX <- platformOS $ targetPlatform dflags = CmmMachOp (MO_Add W32) [ CmmReg (CmmGlobal PicBaseReg) - , CmmLit $ picRelative + , CmmLit $ picRelative dflags (platformArch $ targetPlatform dflags) (platformOS $ targetPlatform dflags) lbl ] @@ -171,15 +173,16 @@ cmmMakePicReference dflags lbl | ArchPPC_64 _ <- platformArch $ targetPlatform dflags = CmmMachOp (MO_Add W32) -- code model medium [ CmmReg (CmmGlobal PicBaseReg) - , CmmLit $ picRelative + , CmmLit $ picRelative dflags (platformArch $ targetPlatform dflags) (platformOS $ targetPlatform dflags) lbl ] - | (gopt Opt_PIC dflags || WayDyn `elem` ways dflags) && absoluteLabel lbl + | (positionIndependent dflags || gopt Opt_ExternalDynamicRefs dflags) + && absoluteLabel lbl = CmmMachOp (MO_Add (wordWidth dflags)) [ CmmReg (CmmGlobal PicBaseReg) - , CmmLit $ picRelative + , CmmLit $ picRelative dflags (platformArch $ targetPlatform dflags) (platformOS $ targetPlatform dflags) lbl ] @@ -236,7 +239,7 @@ howToAccessLabel howToAccessLabel dflags _ OSMinGW32 this_mod _ lbl -- Assume all symbols will be in the same PE, so just access them directly. - | WayDyn `notElem` ways dflags + | not (gopt Opt_ExternalDynamicRefs dflags) = AccessDirectly -- If the target symbol is in another PE we need to access it via the @@ -272,7 +275,7 @@ howToAccessLabel dflags arch OSDarwin this_mod DataReference lbl -- we'd need to pass the current Module all the way in to -- this function. | arch /= ArchX86_64 - , gopt Opt_PIC dflags && externallyVisibleCLabel lbl + , positionIndependent dflags && externallyVisibleCLabel lbl = AccessViaSymbolPtr | otherwise @@ -313,8 +316,8 @@ howToAccessLabel _dflags _arch OSAIX _this_mod kind _lbl -- -- ELF tries to pretend to the main application code that dynamic linking does -- not exist. While this may sound convenient, it tends to mess things up in --- very bad ways, so we have to be careful when we generate code for the main --- program (-dynamic but no -fPIC). +-- very bad ways, so we have to be careful when we generate code for a non-PIE +-- main program (-dynamic but no -fPIC). -- -- Indirect access is required for references to imported symbols -- from position independent code. It is also required from the main program @@ -337,7 +340,8 @@ howToAccessLabel dflags _ os _ _ _ -- if we don't dynamically link to Haskell code, -- it actually manages to do so without messing things up. | osElfTarget os - , not (gopt Opt_PIC dflags) && WayDyn `notElem` ways dflags + , not (positionIndependent dflags) && + not (gopt Opt_ExternalDynamicRefs dflags) = AccessDirectly howToAccessLabel dflags arch os this_mod DataReference lbl @@ -351,7 +355,7 @@ howToAccessLabel dflags arch os this_mod DataReference lbl -- via a symbol pointer (see below for an explanation why -- PowerPC32 Linux is especially broken). | arch == ArchPPC - , gopt Opt_PIC dflags + , positionIndependent dflags -> AccessViaSymbolPtr | otherwise @@ -372,12 +376,13 @@ howToAccessLabel dflags arch os this_mod DataReference lbl howToAccessLabel dflags arch os this_mod CallReference lbl | osElfTarget os - , labelDynamic dflags this_mod lbl && not (gopt Opt_PIC dflags) + , labelDynamic dflags this_mod lbl && not (positionIndependent dflags) = AccessDirectly | osElfTarget os , arch /= ArchX86 - , labelDynamic dflags this_mod lbl && gopt Opt_PIC dflags + , labelDynamic dflags this_mod lbl + , positionIndependent dflags = AccessViaStub howToAccessLabel dflags _ os this_mod _ lbl @@ -388,7 +393,7 @@ howToAccessLabel dflags _ os this_mod _ lbl -- all other platforms howToAccessLabel dflags _ _ _ _ _ - | not (gopt Opt_PIC dflags) + | not (positionIndependent dflags) = AccessDirectly | otherwise @@ -397,10 +402,10 @@ howToAccessLabel dflags _ _ _ _ _ -- ------------------------------------------------------------------- --- | Says what we we have to add to our 'PIC base register' in order to +-- | Says what we have to add to our 'PIC base register' in order to -- get the address of a label. -picRelative :: Arch -> OS -> CLabel -> CmmLit +picRelative :: DynFlags -> Arch -> OS -> CLabel -> CmmLit -- Darwin, but not x86_64: -- The PIC base register points to the PIC base label at the beginning @@ -409,15 +414,15 @@ picRelative :: Arch -> OS -> CLabel -> CmmLit -- We have already made sure that all labels that are not from the current -- module are accessed indirectly ('as' can't calculate differences between -- undefined labels). -picRelative arch OSDarwin lbl +picRelative dflags arch OSDarwin lbl | arch /= ArchX86_64 - = CmmLabelDiffOff lbl mkPicBaseLabel 0 + = CmmLabelDiffOff lbl mkPicBaseLabel 0 (wordWidth dflags) -- On AIX we use an indirect local TOC anchored by 'gotLabel'. -- This way we use up only one global TOC entry per compilation-unit -- (this is quite similiar to GCC's @-mminimal-toc@ compilation mode) -picRelative _ OSAIX lbl - = CmmLabelDiffOff lbl gotLabel 0 +picRelative dflags _ OSAIX lbl + = CmmLabelDiffOff lbl gotLabel 0 (wordWidth dflags) -- PowerPC Linux: -- The PIC base register points to our fake GOT. Use a label difference @@ -425,9 +430,9 @@ picRelative _ OSAIX lbl -- We have made sure that *everything* is accessed indirectly, so this -- is only used for offsets from the GOT to symbol pointers inside the -- GOT. -picRelative ArchPPC os lbl +picRelative dflags ArchPPC os lbl | osElfTarget os - = CmmLabelDiffOff lbl gotLabel 0 + = CmmLabelDiffOff lbl gotLabel 0 (wordWidth dflags) -- Most Linux versions: @@ -437,7 +442,7 @@ picRelative ArchPPC os lbl -- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers, -- and a GotSymbolOffset label for other things. -- For reasons of tradition, the symbol offset label is written as a plain label. -picRelative arch os lbl +picRelative _ arch os lbl | osElfTarget os || (os == OSDarwin && arch == ArchX86_64) = let result | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl @@ -448,7 +453,7 @@ picRelative arch os lbl in result -picRelative _ _ _ +picRelative _ _ _ _ = panic "PositionIndependentCode.picRelative undefined for this platform" @@ -467,7 +472,7 @@ needImportedSymbols dflags arch os -- PowerPC Linux: -fPIC or -dynamic | osElfTarget os , arch == ArchPPC - = gopt Opt_PIC dflags || WayDyn `elem` ways dflags + = positionIndependent dflags || gopt Opt_ExternalDynamicRefs dflags -- PowerPC 64 Linux: always | osElfTarget os @@ -477,7 +482,8 @@ needImportedSymbols dflags arch os -- i386 (and others?): -dynamic but not -fPIC | osElfTarget os , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2 - = WayDyn `elem` ways dflags && not (gopt Opt_PIC dflags) + = gopt Opt_ExternalDynamicRefs dflags && + not (positionIndependent dflags) | otherwise = False @@ -499,7 +505,7 @@ gotLabel -- However, for PIC on x86, we need a small helper function. pprGotDeclaration :: DynFlags -> Arch -> OS -> SDoc pprGotDeclaration dflags ArchX86 OSDarwin - | gopt Opt_PIC dflags + | positionIndependent dflags = vcat [ text ".section __TEXT,__textcoal_nt,coalesced,no_toc", text ".weak_definition ___i686.get_pc_thunk.ax", @@ -540,7 +546,7 @@ pprGotDeclaration _ (ArchPPC_64 _) _ pprGotDeclaration dflags arch os | osElfTarget os , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2 - , not (gopt Opt_PIC dflags) + , not (positionIndependent dflags) = empty | osElfTarget os @@ -565,7 +571,7 @@ pprGotDeclaration _ _ _ pprImportedSymbol :: DynFlags -> Platform -> CLabel -> SDoc pprImportedSymbol dflags platform@(Platform { platformArch = ArchPPC, platformOS = OSDarwin }) importedLbl | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl - = case gopt Opt_PIC dflags of + = case positionIndependent dflags of False -> vcat [ text ".symbol_stub", @@ -619,7 +625,7 @@ pprImportedSymbol dflags platform@(Platform { platformArch = ArchPPC, platformOS pprImportedSymbol dflags platform@(Platform { platformArch = ArchX86, platformOS = OSDarwin }) importedLbl | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl - = case gopt Opt_PIC dflags of + = case positionIndependent dflags of False -> vcat [ text ".symbol_stub", @@ -652,7 +658,7 @@ pprImportedSymbol dflags platform@(Platform { platformArch = ArchX86, platformOS text "\tjmp dyld_stub_binding_helper" ] $+$ vcat [ text ".section __DATA, __la_sym_ptr" - <> (if gopt Opt_PIC dflags then int 2 else int 3) + <> (if positionIndependent dflags then int 2 else int 3) <> text ",lazy_symbol_pointers", text "L" <> pprCLabel platform lbl <> ptext (sLit "$lazy_ptr:"), text "\t.indirect_symbol" <+> pprCLabel platform lbl, diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 1e88a1d025..f246ec36f1 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -25,6 +25,8 @@ where #include "../includes/MachDeps.h" -- NCG stuff: +import GhcPrelude + import CodeGen.Platform import PPC.Instr import PPC.Cond @@ -52,7 +54,6 @@ import Hoopl.Graph -- The rest: import OrdList import Outputable -import Unique import DynFlags import Control.Monad ( mapAndUnzipM, when ) @@ -90,13 +91,23 @@ cmmTopCodeGen (CmmProc info lab live graph) = do case picBaseMb of Just picBase -> initializePicBase_ppc arch os picBase tops Nothing -> return tops - ArchPPC_64 ELF_V1 -> return tops + ArchPPC_64 ELF_V1 -> fixup_entry tops -- generating function descriptor is handled in -- pretty printer - ArchPPC_64 ELF_V2 -> return tops + ArchPPC_64 ELF_V2 -> fixup_entry tops -- generating function prologue is handled in -- pretty printer _ -> panic "PPC.cmmTopCodeGen: unknown arch" + where + fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics) + = do + let BasicBlock bID insns = entry + bID' <- if lab == (blockLbl bID) + then newBlockId + else return bID + let b' = BasicBlock bID' insns + return (CmmProc info lab live (ListGraph (b':blocks)) : statics) + fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc" cmmTopCodeGen (CmmData sec dat) = do return [CmmData sec dat] -- no translation, we just use CmmStatic @@ -161,8 +172,8 @@ stmtToInstrs stmt = do -> genCCall target result_regs args CmmBranch id -> genBranch id - CmmCondBranch arg true false _ -> do - b1 <- genCondJump true arg + CmmCondBranch arg true false prediction -> do + b1 <- genCondJump true arg prediction b2 <- genBranch false return (b1 `appOL` b2) CmmSwitch arg ids -> do dflags <- getDynFlags @@ -214,7 +225,7 @@ getRegisterReg platform (CmmGlobal mid) jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = mkAsmTempLabel (getUnique blockid) + where blockLabel = blockLbl blockid @@ -371,6 +382,14 @@ iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do mov_lo = MR rlo expr_reg return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) rlo + +iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do + (expr_reg,expr_code) <- getSomeReg expr + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31)) + mov_lo = MR rlo expr_reg + return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo iselExpr64 expr = pprPanic "iselExpr64(powerpc)" (pprExpr expr) @@ -719,7 +738,7 @@ data Amode = Amode AddrMode InstrBlock {- -Now, given a tree (the argument to an CmmLoad) that references memory, +Now, given a tree (the argument to a CmmLoad) that references memory, produce a suitable addressing mode. A Rule of the Game (tm) for Amodes: use of the addr bit must @@ -1070,11 +1089,12 @@ comparison to do. genCondJump :: BlockId -- the branch target -> CmmExpr -- the condition on which to branch + -> Maybe Bool -> NatM InstrBlock -genCondJump id bool = do +genCondJump id bool prediction = do CondCode _ cond code <- getCondCode bool - return (code `snocOL` BCC cond id) + return (code `snocOL` BCC cond id prediction) @@ -1098,6 +1118,90 @@ genCCall (PrimTarget MO_Touch) _ _ genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ = return $ nilOL +genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] + = do dflags <- getDynFlags + let platform = targetPlatform dflags + fmt = intFormat width + reg_dst = getRegisterReg platform (CmmLocal dst) + (instr, n_code) <- case amop of + AMO_Add -> getSomeRegOrImm ADD True reg_dst + AMO_Sub -> case n of + CmmLit (CmmInt i _) + | Just imm <- makeImmediate width True (-i) + -> return (ADD reg_dst reg_dst (RIImm imm), nilOL) + _ + -> do + (n_reg, n_code) <- getSomeReg n + return (SUBF reg_dst n_reg reg_dst, n_code) + AMO_And -> getSomeRegOrImm AND False reg_dst + AMO_Nand -> do (n_reg, n_code) <- getSomeReg n + return (NAND reg_dst reg_dst n_reg, n_code) + AMO_Or -> getSomeRegOrImm OR False reg_dst + AMO_Xor -> getSomeRegOrImm XOR False reg_dst + Amode addr_reg addr_code <- getAmodeIndex addr + lbl_retry <- getBlockIdNat + return $ n_code `appOL` addr_code + `appOL` toOL [ HWSYNC + , BCC ALWAYS lbl_retry Nothing + + , NEWBLOCK lbl_retry + , LDR fmt reg_dst addr_reg + , instr + , STC fmt reg_dst addr_reg + , BCC NE lbl_retry (Just False) + , ISYNC + ] + where + getAmodeIndex (CmmMachOp (MO_Add _) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) + getAmodeIndex other + = do + (reg, code) <- getSomeReg other + return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here! + getSomeRegOrImm op sign dst + = case n of + CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i + -> return (op dst dst (RIImm imm), nilOL) + _ + -> do + (n_reg, n_code) <- getSomeReg n + return (op dst dst (RIReg n_reg), n_code) + +genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr] + = do dflags <- getDynFlags + let platform = targetPlatform dflags + fmt = intFormat width + reg_dst = getRegisterReg platform (CmmLocal dst) + form = if widthInBits width == 64 then DS else D + Amode addr_reg addr_code <- getAmode form addr + lbl_end <- getBlockIdNat + return $ addr_code `appOL` toOL [ HWSYNC + , LD fmt reg_dst addr_reg + , CMP fmt reg_dst (RIReg reg_dst) + , BCC NE lbl_end (Just False) + , BCC ALWAYS lbl_end Nothing + -- See Note [Seemingly useless cmp and bne] + , NEWBLOCK lbl_end + , ISYNC + ] + +-- Note [Seemingly useless cmp and bne] +-- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction +-- the second paragraph says that isync may complete before storage accesses +-- "associated" with a preceding instruction have been performed. The cmp +-- operation and the following bne introduce a data and control dependency +-- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe +-- Fetch). +-- This is also what gcc does. + + +genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do + code <- assignMem_IntCode (intFormat width) addr val + return $ unitOL(HWSYNC) `appOL` code + genCCall (PrimTarget (MO_Clz width)) [dst] [src] = do dflags <- getDynFlags let platform = targetPlatform dflags @@ -1110,17 +1214,17 @@ genCCall (PrimTarget (MO_Clz width)) [dst] [src] lbl3 <- getBlockIdNat let vr_hi = getHiVRegFromLo vr_lo cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0)) - , BCC NE lbl2 - , BCC ALWAYS lbl1 + , BCC NE lbl2 Nothing + , BCC ALWAYS lbl1 Nothing , NEWBLOCK lbl1 , CNTLZ II32 reg_dst vr_lo , ADD reg_dst reg_dst (RIImm (ImmInt 32)) - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl2 , CNTLZ II32 reg_dst vr_hi - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl3 ] @@ -1167,8 +1271,8 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src] cnttzlo <- cnttz format reg_dst vr_lo let vr_hi = getHiVRegFromLo vr_lo cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0)) - , BCC NE lbl2 - , BCC ALWAYS lbl1 + , BCC NE lbl2 Nothing + , BCC ALWAYS lbl1 Nothing , NEWBLOCK lbl1 , ADD x' vr_hi (RIImm (ImmInt (-1))) @@ -1176,12 +1280,12 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src] , CNTLZ format r' x'' -- 32 + (32 - clz(x'')) , SUBFC reg_dst r' (RIImm (ImmInt 64)) - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl2 ] `appOL` cnttzlo `appOL` - toOL [ BCC ALWAYS lbl3 + toOL [ BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl3 ] @@ -1229,6 +1333,7 @@ genCCall target dest_regs argsAndHints PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs argsAndHints PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints + PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width dest_regs argsAndHints @@ -1315,21 +1420,21 @@ genCCall target dest_regs argsAndHints -- rhat = un32 - q1*vn1 , MULL fmt tmp q1 (RIReg vn1) , SUBF rhat tmp un32 - , BCC ALWAYS again1 + , BCC ALWAYS again1 Nothing , NEWBLOCK again1 -- if (q1 >= b || q1*vn0 > b*rhat + un1) , CMPL fmt q1 (RIReg b) - , BCC GEU then1 - , BCC ALWAYS no1 + , BCC GEU then1 Nothing + , BCC ALWAYS no1 Nothing , NEWBLOCK no1 , MULL fmt tmp q1 (RIReg vn0) , SL fmt tmp1 rhat (RIImm (ImmInt half)) , ADD tmp1 tmp1 (RIReg un1) , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif1 - , BCC ALWAYS then1 + , BCC LEU endif1 Nothing + , BCC ALWAYS then1 Nothing , NEWBLOCK then1 -- q1 = q1 - 1 @@ -1338,8 +1443,8 @@ genCCall target dest_regs argsAndHints , ADD rhat rhat (RIReg vn1) -- if (rhat < b) goto again1 , CMPL fmt rhat (RIReg b) - , BCC LTT again1 - , BCC ALWAYS endif1 + , BCC LTT again1 Nothing + , BCC ALWAYS endif1 Nothing , NEWBLOCK endif1 -- un21 = un32*b + un1 - q1*v @@ -1353,21 +1458,21 @@ genCCall target dest_regs argsAndHints -- rhat = un21- q0*vn1 , MULL fmt tmp q0 (RIReg vn1) , SUBF rhat tmp un21 - , BCC ALWAYS again2 + , BCC ALWAYS again2 Nothing , NEWBLOCK again2 -- if (q0>b || q0*vn0 > b*rhat + un0) , CMPL fmt q0 (RIReg b) - , BCC GEU then2 - , BCC ALWAYS no2 + , BCC GEU then2 Nothing + , BCC ALWAYS no2 Nothing , NEWBLOCK no2 , MULL fmt tmp q0 (RIReg vn0) , SL fmt tmp1 rhat (RIImm (ImmInt half)) , ADD tmp1 tmp1 (RIReg un0) , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif2 - , BCC ALWAYS then2 + , BCC LEU endif2 Nothing + , BCC ALWAYS then2 Nothing , NEWBLOCK then2 -- q0 = q0 - 1 @@ -1376,8 +1481,8 @@ genCCall target dest_regs argsAndHints , ADD rhat rhat (RIReg vn1) -- if (rhat<b) goto again2 , CMPL fmt rhat (RIReg b) - , BCC LTT again2 - , BCC ALWAYS endif2 + , BCC LTT again2 Nothing + , BCC ALWAYS endif2 Nothing , NEWBLOCK endif2 -- compute remainder @@ -1419,6 +1524,11 @@ genCCall target dest_regs argsAndHints add2Op _ _ _ = panic "genCCall: Wrong number of arguments/results for add2" + addcOp platform [res_r, res_c] [arg_x, arg_y] + = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y] + addcOp _ _ _ + = panic "genCCall: Wrong number of arguments/results for addc" + -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1, -- which is 0 for borrow and 1 otherwise. We need 1 and 0 -- so xor with 1. @@ -1598,7 +1708,7 @@ genCCall' dflags gcp target dest_regs args uses_pic_base_implicitly = do -- See Note [implicit register in PPC PIC code] -- on why we claim to use PIC register here - when (gopt Opt_PIC dflags && target32Bit platform) $ do + when (positionIndependent dflags && target32Bit platform) $ do _ <- getPicBaseNat $ archWordFormat True return () @@ -1881,6 +1991,10 @@ genCCall' dflags gcp target dest_regs args MO_F32_Tanh -> (fsLit "tanh", True) MO_F32_Pwr -> (fsLit "pow", True) + MO_F32_Asinh -> (fsLit "asinh", True) + MO_F32_Acosh -> (fsLit "acosh", True) + MO_F32_Atanh -> (fsLit "atanh", True) + MO_F64_Exp -> (fsLit "exp", False) MO_F64_Log -> (fsLit "log", False) MO_F64_Sqrt -> (fsLit "sqrt", False) @@ -1899,32 +2013,40 @@ genCCall' dflags gcp target dest_regs args MO_F64_Tanh -> (fsLit "tanh", False) MO_F64_Pwr -> (fsLit "pow", False) + MO_F64_Asinh -> (fsLit "asinh", False) + MO_F64_Acosh -> (fsLit "acosh", False) + MO_F64_Atanh -> (fsLit "atanh", False) + MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False) MO_Memcpy _ -> (fsLit "memcpy", False) MO_Memset _ -> (fsLit "memset", False) MO_Memmove _ -> (fsLit "memmove", False) + MO_Memcmp _ -> (fsLit "memcmp", False) MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False) - MO_Clz w -> (fsLit $ clzLabel w, False) - MO_Ctz w -> (fsLit $ ctzLabel w, False) - MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False) + MO_Pdep w -> (fsLit $ pdepLabel w, False) + MO_Pext w -> (fsLit $ pextLabel w, False) + MO_Clz _ -> unsupported + MO_Ctz _ -> unsupported + MO_AtomicRMW {} -> unsupported MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False) - MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False) - MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False) + MO_AtomicRead _ -> unsupported + MO_AtomicWrite _ -> unsupported MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported - (MO_Prefetch_Data _ ) -> unsupported + MO_Prefetch_Data _ -> unsupported unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported") @@ -1950,7 +2072,7 @@ genSwitch dflags expr targets ] return code - | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags) + | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags) = do (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) let fmt = archWordFormat $ target32Bit $ targetPlatform dflags @@ -1988,15 +2110,16 @@ generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl CmmStatics Instr) generateJumpTableForInstr dflags (BCTR ids (Just lbl)) = let jumpTable - | (gopt Opt_PIC dflags) + | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags) = map jumpTableEntryRel ids | otherwise = map (jumpTableEntry dflags) ids where jumpTableEntryRel Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) jumpTableEntryRel (Just blockid) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel (getUnique blockid) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 + (wordWidth dflags)) + where blockLabel = blockLbl blockid in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable)) generateJumpTableForInstr _ _ = Nothing diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs index 0e4b1fd701..bd8bdee81a 100644 --- a/compiler/nativeGen/PPC/Cond.hs +++ b/compiler/nativeGen/PPC/Cond.hs @@ -8,6 +8,8 @@ module PPC.Cond ( where +import GhcPrelude + import Panic data Cond diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index eb179c5a99..8eb5e8fa8d 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -23,6 +23,8 @@ module PPC.Instr ( where +import GhcPrelude + import PPC.Regs import PPC.Cond import Instruction @@ -75,19 +77,19 @@ instance Instruction Instr where mkStackDeallocInstr = ppc_mkStackDeallocInstr -ppc_mkStackAllocInstr :: Platform -> Int -> Instr +ppc_mkStackAllocInstr :: Platform -> Int -> [Instr] ppc_mkStackAllocInstr platform amount = ppc_mkStackAllocInstr' platform (-amount) -ppc_mkStackDeallocInstr :: Platform -> Int -> Instr +ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr] ppc_mkStackDeallocInstr platform amount = ppc_mkStackAllocInstr' platform amount -ppc_mkStackAllocInstr' :: Platform -> Int -> Instr +ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr] ppc_mkStackAllocInstr' platform amount = case platformArch platform of - ArchPPC -> UPDATE_SP II32 (ImmInt amount) - ArchPPC_64 _ -> UPDATE_SP II64 (ImmInt amount) + ArchPPC -> [UPDATE_SP II32 (ImmInt amount)] + ArchPPC_64 _ -> [UPDATE_SP II64 (ImmInt amount)] _ -> panic $ "ppc_mkStackAllocInstr' " ++ show (platformArch platform) @@ -124,7 +126,7 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do insert_stack_insns (BasicBlock id insns) | Just new_blockid <- mapLookup id new_blockmap - = [ BasicBlock id [alloc, BCC ALWAYS new_blockid] + = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing] , BasicBlock new_blockid block' ] | otherwise @@ -137,11 +139,11 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do -- "labeled-goto" we use JMP, and for "computed-goto" we -- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'. = case insn of - JMP _ -> dealloc : insn : r - BCTR [] Nothing -> dealloc : insn : r + JMP _ -> dealloc ++ (insn : r) + BCTR [] Nothing -> dealloc ++ (insn : r) BCTR ids label -> BCTR (map (fmap retarget) ids) label : r - BCCFAR cond b -> BCCFAR cond (retarget b) : r - BCC cond b -> BCC cond (retarget b) : r + BCCFAR cond b p -> BCCFAR cond (retarget b) p : r + BCC cond b p -> BCC cond (retarget b) p : r _ -> insn : r -- BL and BCTRL are call-like instructions rather than -- jumps, and are used only for C calls. @@ -190,10 +192,12 @@ data Instr -- Loads and stores. | LD Format Reg AddrMode -- Load format, dst, src | LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset + | LDR Format Reg AddrMode -- Load and reserve format, dst, src | LA Format Reg AddrMode -- Load arithmetic format, dst, src | ST Format Reg AddrMode -- Store format, src, dst | STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset | STU Format Reg AddrMode -- Store with Update format, src, dst + | STC Format Reg AddrMode -- Store conditional format, src, dst | LIS Reg Imm -- Load Immediate Shifted dst, src | LI Reg Imm -- Load Immediate dst, src | MR Reg Reg -- Move Register dst, src -- also for fmr @@ -201,8 +205,12 @@ data Instr | CMP Format Reg RI -- format, src1, src2 | CMPL Format Reg RI -- format, src1, src2 - | BCC Cond BlockId - | BCCFAR Cond BlockId + | BCC Cond BlockId (Maybe Bool) -- cond, block, hint + | BCCFAR Cond BlockId (Maybe Bool) -- cond, block, hint + -- hint: + -- Just True: branch likely taken + -- Just False: branch likely not taken + -- Nothing: no hint | JMP CLabel -- same as branch, -- but with CLabel instead of block ID | MTCTR Reg @@ -232,6 +240,7 @@ data Instr | DIV Format Bool Reg Reg Reg | AND Reg Reg RI -- dst, src1, src2 | ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2 + | NAND Reg Reg Reg -- dst, src1, src2 | OR Reg Reg RI -- dst, src1, src2 | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2 | XOR Reg Reg RI -- dst, src1, src2 @@ -272,6 +281,8 @@ data Instr | MFLR Reg -- move from link register | FETCHPC Reg -- pseudo-instruction: -- bcl to next insn, mflr reg + | HWSYNC -- heavy weight sync + | ISYNC -- instruction synchronize | LWSYNC -- memory barrier | NOP -- no operation, PowerPC 64 bit -- needs this as place holder to @@ -290,17 +301,19 @@ ppc_regUsageOfInstr platform instr = case instr of LD _ reg addr -> usage (regAddr addr, [reg]) LDFAR _ reg addr -> usage (regAddr addr, [reg]) + LDR _ reg addr -> usage (regAddr addr, [reg]) LA _ reg addr -> usage (regAddr addr, [reg]) ST _ reg addr -> usage (reg : regAddr addr, []) STFAR _ reg addr -> usage (reg : regAddr addr, []) STU _ reg addr -> usage (reg : regAddr addr, []) + STC _ reg addr -> usage (reg : regAddr addr, []) LIS reg _ -> usage ([], [reg]) LI reg _ -> usage ([], [reg]) MR reg1 reg2 -> usage ([reg2], [reg1]) CMP _ reg ri -> usage (reg : regRI ri,[]) CMPL _ reg ri -> usage (reg : regRI ri,[]) - BCC _ _ -> noUsage - BCCFAR _ _ -> noUsage + BCC _ _ _ -> noUsage + BCCFAR _ _ _ -> noUsage MTCTR reg -> usage ([reg],[]) BCTR _ _ -> noUsage BL _ params -> usage (params, callClobberedRegs platform) @@ -325,6 +338,7 @@ ppc_regUsageOfInstr platform instr AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ORIS reg1 reg2 _ -> usage ([reg2], [reg1]) XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) @@ -380,17 +394,19 @@ ppc_patchRegsOfInstr instr env = case instr of LD fmt reg addr -> LD fmt (env reg) (fixAddr addr) LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr) + LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr) LA fmt reg addr -> LA fmt (env reg) (fixAddr addr) ST fmt reg addr -> ST fmt (env reg) (fixAddr addr) STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr) STU fmt reg addr -> STU fmt (env reg) (fixAddr addr) + STC fmt reg addr -> STC fmt (env reg) (fixAddr addr) LIS reg imm -> LIS (env reg) imm LI reg imm -> LI (env reg) imm MR reg1 reg2 -> MR (env reg1) (env reg2) CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri) CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri) - BCC cond lbl -> BCC cond lbl - BCCFAR cond lbl -> BCCFAR cond lbl + BCC cond lbl p -> BCC cond lbl p + BCCFAR cond lbl p -> BCCFAR cond lbl p MTCTR reg -> MTCTR (env reg) BCTR targets lbl -> BCTR targets lbl BL imm argRegs -> BL imm argRegs -- argument regs @@ -417,6 +433,7 @@ ppc_patchRegsOfInstr instr env AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3) + NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3) OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) @@ -480,8 +497,8 @@ ppc_isJumpishInstr instr ppc_jumpDestsOfInstr :: Instr -> [BlockId] ppc_jumpDestsOfInstr insn = case insn of - BCC _ id -> [id] - BCCFAR _ id -> [id] + BCC _ id _ -> [id] + BCCFAR _ id _ -> [id] BCTR targets _ -> [id | Just id <- targets] _ -> [] @@ -492,8 +509,8 @@ ppc_jumpDestsOfInstr insn ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr ppc_patchJumpInstr insn patchF = case insn of - BCC cc id -> BCC cc (patchF id) - BCCFAR cc id -> BCCFAR cc (patchF id) + BCC cc id p -> BCC cc (patchF id) p + BCCFAR cc id p -> BCCFAR cc (patchF id) p BCTR ids lbl -> BCTR (map (fmap patchF) ids) lbl _ -> insn @@ -631,16 +648,12 @@ ppc_mkRegRegMoveInstr src dst -- | Make an unconditional jump instruction. --- For architectures with branch delay slots, its ok to put --- a NOP after the jump. Don't fill the delay slot with an --- instruction that references regs or you'll confuse the --- linear allocator. ppc_mkJumpInstr :: BlockId -> [Instr] ppc_mkJumpInstr id - = [BCC ALWAYS id] + = [BCC ALWAYS id Nothing] -- | Take the source and destination from this reg -> reg move instruction @@ -669,12 +682,12 @@ makeFarBranches info_env blocks handleBlock addr (BasicBlock id instrs) = BasicBlock id (zipWith makeFar [addr..] instrs) - makeFar _ (BCC ALWAYS tgt) = BCC ALWAYS tgt - makeFar addr (BCC cond tgt) + makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing + makeFar addr (BCC cond tgt p) | abs (addr - targetAddr) >= nearLimit - = BCCFAR cond tgt + = BCCFAR cond tgt p | otherwise - = BCC cond tgt + = BCC cond tgt p where Just targetAddr = lookupUFM blockAddressMap tgt makeFar _ other = other diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index 63d01c3913..2f64d82ee5 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -9,6 +9,8 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} module PPC.Ppr (pprNatCmmDecl) where +import GhcPrelude + import PPC.Regs import PPC.Instr import PPC.Cond @@ -23,9 +25,10 @@ import Cmm hiding (topInfoTable) import Hoopl.Collections import Hoopl.Label +import BlockId import CLabel -import Unique ( pprUniqueAlways, Uniquable(..) ) +import Unique ( pprUniqueAlways, getUnique ) import Platform import FastString import Outputable @@ -78,19 +81,17 @@ pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = pprFunctionDescriptor :: CLabel -> SDoc pprFunctionDescriptor lab = pprGloblDecl lab - $$ text ".section \".opd\",\"aw\"" - $$ text ".align 3" + $$ text "\t.section \".opd\", \"aw\"" + $$ text "\t.align 3" $$ ppr lab <> char ':' - $$ text ".quad ." - <> ppr lab - <> text ",.TOC.@tocbase,0" - $$ text ".previous" - $$ text ".type " - <> ppr lab - <> text ", @function" - $$ char '.' - <> ppr lab - <> char ':' + $$ text "\t.quad ." + <> ppr lab + <> text ",.TOC.@tocbase,0" + $$ text "\t.previous" + $$ text "\t.type" + <+> ppr lab + <> text ", @function" + $$ char '.' <> ppr lab <> char ':' pprFunctionPrologue :: CLabel ->SDoc pprFunctionPrologue lab = pprGloblDecl lab @@ -108,7 +109,7 @@ pprFunctionPrologue lab = pprGloblDecl lab pprBasicBlock :: LabelMap CmmStatics -> NatBasicBlock Instr -> SDoc pprBasicBlock info_env (BasicBlock blockid instrs) = maybe_infotable $$ - pprLabel (mkAsmTempLabel (getUnique blockid)) $$ + pprLabel (blockLbl blockid) $$ vcat (map pprInstr instrs) where maybe_infotable = case mapLookup blockid info_env of @@ -310,11 +311,13 @@ pprImm (HIGHESTA i) pprAddr :: AddrMode -> SDoc pprAddr (AddrRegReg r1 r2) - = pprReg r1 <+> text ", " <+> pprReg r2 - -pprAddr (AddrRegImm r1 (ImmInt i)) = hcat [ int i, char '(', pprReg r1, char ')' ] -pprAddr (AddrRegImm r1 (ImmInteger i)) = hcat [ integer i, char '(', pprReg r1, char ')' ] -pprAddr (AddrRegImm r1 imm) = hcat [ pprImm imm, char '(', pprReg r1, char ')' ] + = pprReg r1 <> char ',' <+> pprReg r2 +pprAddr (AddrRegImm r1 (ImmInt i)) + = hcat [ int i, char '(', pprReg r1, char ')' ] +pprAddr (AddrRegImm r1 (ImmInteger i)) + = hcat [ integer i, char '(', pprReg r1, char ')' ] +pprAddr (AddrRegImm r1 imm) + = hcat [ pprImm imm, char '(', pprReg r1, char ')' ] pprSectionAlign :: Section -> SDoc @@ -450,15 +453,27 @@ pprInstr (LD fmt reg addr) = hcat [ text ", ", pprAddr addr ] + pprInstr (LDFAR fmt reg (AddrRegImm source off)) = sdocWithPlatform $ \platform -> vcat [ pprInstr (ADDIS (tmpReg platform) source (HA off)), pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off))) ] - pprInstr (LDFAR _ _ _) = panic "PPC.Ppr.pprInstr LDFAR: no match" +pprInstr (LDR fmt reg1 addr) = hcat [ + text "\tl", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC.Ppr.Instr LDR: no match", + text "arx\t", + pprReg reg1, + text ", ", + pprAddr addr + ] + pprInstr (LA fmt reg addr) = hcat [ char '\t', text "l", @@ -508,6 +523,17 @@ pprInstr (STU fmt reg addr) = hcat [ text ", ", pprAddr addr ] +pprInstr (STC fmt reg1 addr) = hcat [ + text "\tst", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC.Ppr.Instr STC: no match", + text "cx.\t", + pprReg reg1, + text ", ", + pprAddr addr + ] pprInstr (LIS reg imm) = hcat [ char '\t', text "lis", @@ -569,19 +595,25 @@ pprInstr (CMPL fmt reg ri) = hcat [ RIReg _ -> empty RIImm _ -> char 'i' ] -pprInstr (BCC cond blockid) = hcat [ +pprInstr (BCC cond blockid prediction) = hcat [ char '\t', text "b", pprCond cond, + pprPrediction prediction, char '\t', ppr lbl ] - where lbl = mkAsmTempLabel (getUnique blockid) + where lbl = mkLocalBlockLabel (getUnique blockid) + pprPrediction p = case p of + Nothing -> empty + Just True -> char '+' + Just False -> char '-' -pprInstr (BCCFAR cond blockid) = vcat [ +pprInstr (BCCFAR cond blockid prediction) = vcat [ hcat [ text "\tb", pprCond (condNegate cond), + neg_prediction, text "\t$+8" ], hcat [ @@ -589,7 +621,11 @@ pprInstr (BCCFAR cond blockid) = vcat [ ppr lbl ] ] - where lbl = mkAsmTempLabel (getUnique blockid) + where lbl = mkLocalBlockLabel (getUnique blockid) + neg_prediction = case prediction of + Nothing -> empty + Just True -> char '-' + Just False -> char '+' pprInstr (JMP lbl) -- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL" @@ -741,6 +777,7 @@ pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [ ] pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3) +pprInstr (NAND reg1 reg2 reg3) = pprLogic (sLit "nand") reg1 reg2 (RIReg reg3) pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri @@ -922,6 +959,10 @@ pprInstr (FETCHPC reg) = vcat [ hcat [ text "1:\tmflr\t", pprReg reg ] ] +pprInstr HWSYNC = text "\tsync" + +pprInstr ISYNC = text "\tisync" + pprInstr LWSYNC = text "\tlwsync" pprInstr NOP = text "\tnop" diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs index c4724d4193..30a07b9440 100644 --- a/compiler/nativeGen/PPC/RegInfo.hs +++ b/compiler/nativeGen/PPC/RegInfo.hs @@ -20,6 +20,8 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" +import GhcPrelude + import PPC.Instr import BlockId @@ -49,14 +51,14 @@ shortcutStatics fn (Statics lbl statics) shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel shortcutLabel fn lab - | Just uq <- maybeAsmTemp lab = shortBlockId fn (mkBlockId uq) - | otherwise = lab + | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId + | otherwise = lab shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic shortcutStatic fn (CmmStaticLit (CmmLabel lab)) = CmmStaticLit (CmmLabel (shortcutLabel fn lab)) -shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) - = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w)) + = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w) -- slightly dodgy, we're ignoring the second label, but this -- works with the way we use CmmLabelDiffOff for jump tables now. shortcutStatic _ other_static @@ -69,6 +71,6 @@ shortBlockId shortBlockId fn blockid = case fn blockid of - Nothing -> mkAsmTempLabel uq + Nothing -> mkLocalBlockLabel uq Just (DestBlockId blockid') -> shortBlockId fn blockid' where uq = getUnique blockid diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs index a1befc7837..227517be88 100644 --- a/compiler/nativeGen/PPC/Regs.hs +++ b/compiler/nativeGen/PPC/Regs.hs @@ -50,6 +50,8 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" +import GhcPrelude + import Reg import RegClass import Format @@ -70,7 +72,7 @@ import Data.Int ( Int8, Int16, Int32, Int64 ) -- squeese functions for the graph allocator ----------------------------------- -- | regSqueeze_class reg --- Calculuate the maximum number of register colors that could be +-- Calculate the maximum number of register colors that could be -- denied to a node of this class due to having this reg -- as a neighbour. -- @@ -163,7 +165,7 @@ litToImm (CmmFloat f W32) = ImmFloat f litToImm (CmmFloat f W64) = ImmDouble f litToImm (CmmLabel l) = ImmCLbl l litToImm (CmmLabelOff l off) = ImmIndex l off -litToImm (CmmLabelDiffOff l1 l2 off) +litToImm (CmmLabelDiffOff l1 l2 off _) = ImmConstantSum (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) (ImmInt off) diff --git a/compiler/nativeGen/PprBase.hs b/compiler/nativeGen/PprBase.hs index aca427449d..d96b18783d 100644 --- a/compiler/nativeGen/PprBase.hs +++ b/compiler/nativeGen/PprBase.hs @@ -16,6 +16,8 @@ module PprBase ( where +import GhcPrelude + import AsmUtils import CLabel import Cmm diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs index 598074d881..d9d56d47c4 100644 --- a/compiler/nativeGen/Reg.hs +++ b/compiler/nativeGen/Reg.hs @@ -26,6 +26,8 @@ module Reg ( where +import GhcPrelude + import Outputable import Unique import RegClass diff --git a/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs b/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs index 6771e4ecb9..634e61cb13 100644 --- a/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs +++ b/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs @@ -21,6 +21,8 @@ module RegAlloc.Graph.ArchBase ( bound, squeese ) where +import GhcPrelude + import UniqSet import UniqFM import Unique diff --git a/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs b/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs index 439899071a..0472e4cf09 100644 --- a/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs +++ b/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs @@ -14,9 +14,14 @@ module RegAlloc.Graph.ArchX86 ( worst, squeese, ) where + +import GhcPrelude + import RegAlloc.Graph.ArchBase (Reg(..), RegSub(..), RegClass(..)) import UniqSet +import qualified Data.Array as A + -- | Determine the class of a register classOfReg :: Reg -> RegClass @@ -57,18 +62,28 @@ regName :: Reg -> Maybe String regName reg = case reg of Reg ClassG32 i - | i <= 7-> Just $ [ "eax", "ebx", "ecx", "edx" - , "ebp", "esi", "edi", "esp" ] !! i + | i <= 7 -> + let names = A.listArray (0,8) + [ "eax", "ebx", "ecx", "edx" + , "ebp", "esi", "edi", "esp" ] + in Just $ names A.! i RegSub SubL16 (Reg ClassG32 i) - | i <= 7 -> Just $ [ "ax", "bx", "cx", "dx" - , "bp", "si", "di", "sp"] !! i + | i <= 7 -> + let names = A.listArray (0,8) + [ "ax", "bx", "cx", "dx" + , "bp", "si", "di", "sp"] + in Just $ names A.! i RegSub SubL8 (Reg ClassG32 i) - | i <= 3 -> Just $ [ "al", "bl", "cl", "dl"] !! i + | i <= 3 -> + let names = A.listArray (0,4) [ "al", "bl", "cl", "dl"] + in Just $ names A.! i RegSub SubL8H (Reg ClassG32 i) - | i <= 3 -> Just $ [ "ah", "bh", "ch", "dh"] !! i + | i <= 3 -> + let names = A.listArray (0,4) [ "ah", "bh", "ch", "dh"] + in Just $ names A.! i _ -> Nothing diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs index 7e8047f29f..5ca2412c73 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs @@ -3,6 +3,8 @@ module RegAlloc.Graph.Coalesce ( regCoalesce, slurpJoinMovs ) where +import GhcPrelude + import RegAlloc.Liveness import Instruction import Reg @@ -14,8 +16,6 @@ import UniqFM import UniqSet import UniqSupply -import Data.List - -- | Do register coalescing on this top level thing -- @@ -62,7 +62,7 @@ sinkReg fm r -- | Slurp out mov instructions that only serve to join live ranges. -- --- During a mov, if the source reg dies and the destiation reg is +-- During a mov, if the source reg dies and the destination reg is -- born then we can rename the two regs to the same thing and -- eliminate the move. slurpJoinMovs diff --git a/compiler/nativeGen/RegAlloc/Graph/Main.hs b/compiler/nativeGen/RegAlloc/Graph/Main.hs index 08538453f7..4c17d930ea 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Main.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Main.hs @@ -4,6 +4,8 @@ module RegAlloc.Graph.Main ( regAlloc ) where +import GhcPrelude + import qualified GraphColor as Color import RegAlloc.Liveness import RegAlloc.Graph.Spill @@ -25,7 +27,6 @@ import UniqSet import UniqSupply import Util (seqList) -import Data.List import Data.Maybe import Control.Monad diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs index 0014ab6fed..bce24bdd3c 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Spill.hs @@ -7,6 +7,8 @@ module RegAlloc.Graph.Spill ( SpillStats(..), accSpillSL ) where +import GhcPrelude + import RegAlloc.Liveness import Instruction import Reg @@ -34,7 +36,7 @@ import qualified Data.IntSet as IntSet -- TODO: See if we can split some of the live ranges instead of just globally -- spilling the virtual reg. This might make the spill cleaner's job easier. -- --- TODO: On CISCy x86 and x86_64 we don't nessesarally have to add a mov instruction +-- TODO: On CISCy x86 and x86_64 we don't necessarily have to add a mov instruction -- when making spills. If an instr is using a spilled virtual we may be able to -- address the spill slot directly. -- @@ -111,8 +113,8 @@ regSpill_top platform regSlotMap cmm -- after we've done a successful allocation. let liveSlotsOnEntry' :: BlockMap IntSet liveSlotsOnEntry' - = mapFoldWithKey patchLiveSlot - liveSlotsOnEntry liveVRegsOnEntry + = mapFoldlWithKey patchLiveSlot + liveSlotsOnEntry liveVRegsOnEntry let info' = LiveInfo static firstId @@ -129,10 +131,9 @@ regSpill_top platform regSlotMap cmm -- then record the fact that these slots are now live in those blocks -- in the given slotmap. patchLiveSlot - :: BlockId -> RegSet - -> BlockMap IntSet -> BlockMap IntSet + :: BlockMap IntSet -> BlockId -> RegSet -> BlockMap IntSet - patchLiveSlot blockId regsLive slotMap + patchLiveSlot slotMap blockId regsLive = let -- Slots that are already recorded as being live. curSlotsLive = fromMaybe IntSet.empty diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs index faef4037c2..50001d7334 100644 --- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs +++ b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs @@ -28,6 +28,8 @@ module RegAlloc.Graph.SpillClean ( cleanSpills ) where +import GhcPrelude + import RegAlloc.Liveness import Instruction import Reg diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs index 9811f1a64b..f603b609df 100644 --- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs +++ b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs @@ -13,6 +13,8 @@ module RegAlloc.Graph.SpillCost ( lifeMapFromSpillCostInfo ) where +import GhcPrelude + import RegAlloc.Liveness import Instruction import RegClass diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs index 71956025b0..487e3ee03a 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Stats.hs @@ -16,6 +16,8 @@ module RegAlloc.Graph.Stats ( #include "nativeGen/NCG.h" +import GhcPrelude + import qualified GraphColor as Color import RegAlloc.Liveness import RegAlloc.Graph.Spill @@ -32,9 +34,6 @@ import UniqFM import UniqSet import State -import Data.List - - -- | Holds interesting statistics from the register allocator. data RegAllocStats statics instr @@ -265,8 +264,8 @@ pprStatsConflict stats $$ text "\n") --- | For every vreg, dump it's how many conflicts it has and its lifetime --- good for making a scatter plot. +-- | For every vreg, dump how many conflicts it has, and its lifetime. +-- Good for making a scatter plot. pprStatsLifeConflict :: [RegAllocStats statics instr] -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph @@ -277,10 +276,10 @@ pprStatsLifeConflict stats graph $ foldl' plusSpillCostInfo zeroSpillCostInfo $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ] - scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of - Just (_, l) -> l - Nothing -> 0 - Just node = Color.lookupNode graph r + scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of + Just (_, l) -> l + Nothing -> 0 + Just node = Color.lookupNode graph r in parens $ hcat $ punctuate (text ", ") [ doubleQuotes $ ppr $ Color.nodeId node , ppr $ sizeUniqSet (Color.nodeConflicts node) diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs index 204de846ae..7774985dce 100644 --- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs @@ -8,6 +8,8 @@ where #include "HsVersions.h" +import GhcPrelude + import RegClass import Reg diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs index d4f124e297..1172870729 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Base.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Base.hs @@ -17,6 +17,8 @@ module RegAlloc.Linear.Base ( where +import GhcPrelude + import RegAlloc.Linear.StackMap import RegAlloc.Liveness import Reg diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs index 9933f5bb49..b4e79432d8 100644 --- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs @@ -9,6 +9,8 @@ module RegAlloc.Linear.FreeRegs ( where +import GhcPrelude + import Reg import RegClass diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs index c262b2b059..89f496c409 100644 --- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs +++ b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs @@ -9,6 +9,8 @@ -- module RegAlloc.Linear.JoinToTargets (joinToTargets) where +import GhcPrelude + import RegAlloc.Linear.State import RegAlloc.Linear.Base import RegAlloc.Linear.FreeRegs diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs index 2ba682ad17..6171d8d20d 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs @@ -102,6 +102,8 @@ module RegAlloc.Linear.Main ( #include "HsVersions.h" +import GhcPrelude + import RegAlloc.Linear.State import RegAlloc.Linear.Base import RegAlloc.Linear.StackMap @@ -496,7 +498,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do -- debugging {- freeregs <- getFreeRegsR assig <- getAssigR - pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform }) trace "genRaInsn" + pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform } undefined) trace "genRaInsn" (ppr instr $$ text "r_dying = " <+> ppr r_dying $$ text "w_dying = " <+> ppr w_dying @@ -807,27 +809,29 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc -- case (3): we need to push something out to free up a register [] -> - do let keep' = map getUnique keep + do let inRegOrBoth (InReg _) = True + inRegOrBoth (InBoth _ _) = True + inRegOrBoth _ = False + let candidates' = + flip delListFromUFM keep $ + filterUFM inRegOrBoth $ + assig + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + let candidates = nonDetUFMToList candidates' -- the vregs we could kick out that are already in a slot let candidates_inBoth = [ (temp, reg, mem) - | (temp, InBoth reg mem) <- nonDetUFMToList assig - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - , temp `notElem` keep' + | (temp, InBoth reg mem) <- candidates , targetClassOfRealReg platform reg == classOfVirtualReg r ] -- the vregs we could kick out that are only in a reg -- this would require writing the reg to a new slot before using it. let candidates_inReg = [ (temp, reg) - | (temp, InReg reg) <- nonDetUFMToList assig - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - , temp `notElem` keep' + | (temp, InReg reg) <- candidates , targetClassOfRealReg platform reg == classOfVirtualReg r ] let result diff --git a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs index 5d369249c7..581548212a 100644 --- a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs @@ -2,6 +2,8 @@ module RegAlloc.Linear.PPC.FreeRegs where +import GhcPrelude + import PPC.Regs import RegClass import Reg diff --git a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs index db4d6ba376..653b2707c9 100644 --- a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs @@ -3,6 +3,8 @@ module RegAlloc.Linear.SPARC.FreeRegs where +import GhcPrelude + import SPARC.Regs import RegClass import Reg diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs index 748fb98c30..95819c6fb3 100644 --- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs +++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs @@ -20,6 +20,8 @@ module RegAlloc.Linear.StackMap ( where +import GhcPrelude + import DynFlags import UniqFM import Unique diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs index 8b17d3ab88..6554188f41 100644 --- a/compiler/nativeGen/RegAlloc/Linear/State.hs +++ b/compiler/nativeGen/RegAlloc/Linear/State.hs @@ -31,6 +31,8 @@ module RegAlloc.Linear.State ( ) where +import GhcPrelude + import RegAlloc.Linear.Stats import RegAlloc.Linear.StackMap import RegAlloc.Linear.Base diff --git a/compiler/nativeGen/RegAlloc/Linear/Stats.hs b/compiler/nativeGen/RegAlloc/Linear/Stats.hs index 71dedaeb55..74f3c834d0 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Stats.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Stats.hs @@ -6,6 +6,8 @@ module RegAlloc.Linear.Stats ( where +import GhcPrelude + import RegAlloc.Linear.Base import RegAlloc.Liveness import Instruction @@ -13,7 +15,6 @@ import Instruction import UniqFM import Outputable -import Data.List import State -- | Build a map of how many times each reg was alloced, clobbered, loaded etc. diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs index ae4aa53254..65a566d1c3 100644 --- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs @@ -3,6 +3,8 @@ module RegAlloc.Linear.X86.FreeRegs where +import GhcPrelude + import X86.Regs import RegClass import Reg diff --git a/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs index 5a7f71e3f0..713b053356 100644 --- a/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs @@ -3,6 +3,8 @@ module RegAlloc.Linear.X86_64.FreeRegs where +import GhcPrelude + import X86.Regs import RegClass import Reg diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs index e66139786b..9d93564317 100644 --- a/compiler/nativeGen/RegAlloc/Liveness.hs +++ b/compiler/nativeGen/RegAlloc/Liveness.hs @@ -35,6 +35,8 @@ module RegAlloc.Liveness ( regLiveness, natCmmTopToLive ) where +import GhcPrelude + import Reg import Instruction @@ -145,10 +147,10 @@ instance Instruction instr => Instruction (InstrSR instr) where mkJumpInstr target = map Instr (mkJumpInstr target) mkStackAllocInstr platform amount = - Instr (mkStackAllocInstr platform amount) + Instr <$> mkStackAllocInstr platform amount mkStackDeallocInstr platform amount = - Instr (mkStackDeallocInstr platform amount) + Instr <$> mkStackDeallocInstr platform amount -- | An instruction with liveness information. @@ -812,7 +814,7 @@ computeLiveness computeLiveness platform sccs = case checkIsReverseDependent sccs of Nothing -> livenessSCCs platform mapEmpty [] sccs - Just bad -> pprPanic "RegAlloc.Liveness.computeLivenss" + Just bad -> pprPanic "RegAlloc.Liveness.computeLiveness" (vcat [ text "SCCs aren't in reverse dependent order" , text "bad blockId" <+> ppr bad , ppr sccs]) @@ -1006,5 +1008,3 @@ liveness1 platform liveregs blockmap (LiveInstr instr _) r_dying_br = nonDetEltsUniqSet (mkUniqSet r_dying `unionUniqSets` live_branch_only) -- See Note [Unique Determinism and code generation] - - diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs index 0c793173cb..cd008bbbb1 100644 --- a/compiler/nativeGen/RegClass.hs +++ b/compiler/nativeGen/RegClass.hs @@ -4,6 +4,8 @@ module RegClass where +import GhcPrelude + import Outputable import Unique diff --git a/compiler/nativeGen/SPARC/AddrMode.hs b/compiler/nativeGen/SPARC/AddrMode.hs index bf4d480005..ee40843351 100644 --- a/compiler/nativeGen/SPARC/AddrMode.hs +++ b/compiler/nativeGen/SPARC/AddrMode.hs @@ -6,6 +6,8 @@ module SPARC.AddrMode ( where +import GhcPrelude + import SPARC.Imm import SPARC.Base import Reg diff --git a/compiler/nativeGen/SPARC/Base.hs b/compiler/nativeGen/SPARC/Base.hs index a57d5e1c9a..04e8fed2b3 100644 --- a/compiler/nativeGen/SPARC/Base.hs +++ b/compiler/nativeGen/SPARC/Base.hs @@ -18,6 +18,8 @@ module SPARC.Base ( where +import GhcPrelude + import DynFlags import Panic diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 71d320fa63..a95a22274b 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -22,6 +22,8 @@ where #include "../includes/MachDeps.h" -- NCG stuff: +import GhcPrelude + import SPARC.Base import SPARC.CodeGen.Sanity import SPARC.CodeGen.Amode @@ -58,7 +60,6 @@ import FastString import OrdList import Outputable import Platform -import Unique import Control.Monad ( mapAndUnzipM ) @@ -162,7 +163,7 @@ stmtToInstrs stmt = do {- -Now, given a tree (the argument to an CmmLoad) that references memory, +Now, given a tree (the argument to a CmmLoad) that references memory, produce a suitable addressing mode. A Rule of the Game (tm) for Amodes: use of the addr bit must @@ -185,7 +186,7 @@ temporary, then do the other computation, and then use the temporary: jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = mkAsmTempLabel (getUnique blockid) + where blockLabel = blockLbl blockid @@ -313,7 +314,7 @@ genCondJump bid bool = do genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock genSwitch dflags expr targets - | gopt Opt_PIC dflags + | positionIndependent dflags = error "MachCodeGen: sparc genSwitch PIC not finished\n" | otherwise @@ -422,7 +423,10 @@ genCCall target dest_regs args return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) ForeignTarget expr _ - -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr + -> do (dyn_c, dyn_rs) <- arg_to_int_vregs expr + let dyn_r = case dyn_rs of + [dyn_r'] -> dyn_r' + _ -> panic "SPARC.CodeGen.genCCall: arg_to_int" return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) PrimTarget mop @@ -432,7 +436,10 @@ genCCall target dest_regs args return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) Right mopExpr -> do - (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr + (dyn_c, dyn_rs) <- arg_to_int_vregs mopExpr + let dyn_r = case dyn_rs of + [dyn_r'] -> dyn_r' + _ -> panic "SPARC.CodeGen.genCCall: arg_to_int" return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) return lblOrMopExpr @@ -626,6 +633,10 @@ outOfLineMachOp_table mop MO_F32_Cosh -> fsLit "coshf" MO_F32_Tanh -> fsLit "tanhf" + MO_F32_Asinh -> fsLit "asinhf" + MO_F32_Acosh -> fsLit "acoshf" + MO_F32_Atanh -> fsLit "atanhf" + MO_F64_Exp -> fsLit "exp" MO_F64_Log -> fsLit "log" MO_F64_Sqrt -> fsLit "sqrt" @@ -644,14 +655,21 @@ outOfLineMachOp_table mop MO_F64_Cosh -> fsLit "cosh" MO_F64_Tanh -> fsLit "tanh" + MO_F64_Asinh -> fsLit "asinh" + MO_F64_Acosh -> fsLit "acosh" + MO_F64_Atanh -> fsLit "atanh" + MO_UF_Conv w -> fsLit $ word2FloatLabel w MO_Memcpy _ -> fsLit "memcpy" MO_Memset _ -> fsLit "memset" MO_Memmove _ -> fsLit "memmove" + MO_Memcmp _ -> fsLit "memcmp" MO_BSwap w -> fsLit $ bSwapLabel w MO_PopCnt w -> fsLit $ popCntLabel w + MO_Pdep w -> fsLit $ pdepLabel w + MO_Pext w -> fsLit $ pextLabel w MO_Clz w -> fsLit $ clzLabel w MO_Ctz w -> fsLit $ ctzLabel w MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop @@ -663,6 +681,7 @@ outOfLineMachOp_table mop MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen/Amode.hs b/compiler/nativeGen/SPARC/CodeGen/Amode.hs index a59287f171..33e3f535da 100644 --- a/compiler/nativeGen/SPARC/CodeGen/Amode.hs +++ b/compiler/nativeGen/SPARC/CodeGen/Amode.hs @@ -4,6 +4,8 @@ module SPARC.CodeGen.Amode ( where +import GhcPrelude + import {-# SOURCE #-} SPARC.CodeGen.Gen32 import SPARC.CodeGen.Base import SPARC.AddrMode diff --git a/compiler/nativeGen/SPARC/CodeGen/Base.hs b/compiler/nativeGen/SPARC/CodeGen/Base.hs index 27b533f46b..039bb6496c 100644 --- a/compiler/nativeGen/SPARC/CodeGen/Base.hs +++ b/compiler/nativeGen/SPARC/CodeGen/Base.hs @@ -13,6 +13,8 @@ module SPARC.CodeGen.Base ( where +import GhcPrelude + import SPARC.Instr import SPARC.Cond import SPARC.AddrMode diff --git a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs b/compiler/nativeGen/SPARC/CodeGen/CondCode.hs index e5fb82df4d..e6b2e174b6 100644 --- a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs +++ b/compiler/nativeGen/SPARC/CodeGen/CondCode.hs @@ -6,6 +6,8 @@ module SPARC.CodeGen.CondCode ( where +import GhcPrelude + import {-# SOURCE #-} SPARC.CodeGen.Gen32 import SPARC.CodeGen.Base import SPARC.Instr diff --git a/compiler/nativeGen/SPARC/CodeGen/Expand.hs b/compiler/nativeGen/SPARC/CodeGen/Expand.hs index 70cb0111c0..0b318740db 100644 --- a/compiler/nativeGen/SPARC/CodeGen/Expand.hs +++ b/compiler/nativeGen/SPARC/CodeGen/Expand.hs @@ -5,6 +5,8 @@ module SPARC.CodeGen.Expand ( where +import GhcPrelude + import SPARC.Instr import SPARC.Imm import SPARC.AddrMode @@ -140,7 +142,7 @@ expandMisalignedDoubles instr --- | The the high partner for this float reg. +-- | The high partner for this float reg. fRegHi :: Reg -> Reg fRegHi (RegReal (RealRegSingle r1)) | r1 >= 32 diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs index a0e86f14c4..a7a1f60416 100644 --- a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs +++ b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs @@ -6,6 +6,8 @@ module SPARC.CodeGen.Gen32 ( where +import GhcPrelude + import SPARC.CodeGen.CondCode import SPARC.CodeGen.Amode import SPARC.CodeGen.Gen64 diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs index f186d437d0..6fa7482f9b 100644 --- a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs +++ b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs @@ -7,6 +7,8 @@ module SPARC.CodeGen.Gen64 ( where +import GhcPrelude + import {-# SOURCE #-} SPARC.CodeGen.Gen32 import SPARC.CodeGen.Base import SPARC.CodeGen.Amode @@ -191,6 +193,24 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) return $ ChildCode64 code r_dst_lo +-- only W32 supported for now +iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) + = do + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + -- compute expr and load it into r_dst_lo + (a_reg, a_code) <- getSomeReg expr + + dflags <- getDynFlags + let platform = targetPlatform dflags + code = a_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 31)) r_dst_hi + , mkRegRegMoveInstr platform a_reg r_dst_lo ] + + return $ ChildCode64 code r_dst_lo + iselExpr64 expr = pprPanic "iselExpr64(sparc)" (ppr expr) diff --git a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs b/compiler/nativeGen/SPARC/CodeGen/Sanity.hs index 81641326f2..fcf5b65bde 100644 --- a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs +++ b/compiler/nativeGen/SPARC/CodeGen/Sanity.hs @@ -6,6 +6,8 @@ module SPARC.CodeGen.Sanity ( where +import GhcPrelude + import SPARC.Instr import SPARC.Ppr () import Instruction diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs index da41457950..3fbfb8603f 100644 --- a/compiler/nativeGen/SPARC/Cond.hs +++ b/compiler/nativeGen/SPARC/Cond.hs @@ -7,6 +7,8 @@ module SPARC.Cond ( where +import GhcPrelude + -- | Branch condition codes. data Cond = ALWAYS diff --git a/compiler/nativeGen/SPARC/Imm.hs b/compiler/nativeGen/SPARC/Imm.hs index cb53ba411c..bd2d4ab131 100644 --- a/compiler/nativeGen/SPARC/Imm.hs +++ b/compiler/nativeGen/SPARC/Imm.hs @@ -7,6 +7,8 @@ module SPARC.Imm ( where +import GhcPrelude + import Cmm import CLabel @@ -57,7 +59,7 @@ litToImm lit CmmLabel l -> ImmCLbl l CmmLabelOff l off -> ImmIndex l off - CmmLabelDiffOff l1 l2 off + CmmLabelDiffOff l1 l2 off _ -> ImmConstantSum (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) (ImmInt off) diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs index 4c19ac69a7..54fb513478 100644 --- a/compiler/nativeGen/SPARC/Instr.hs +++ b/compiler/nativeGen/SPARC/Instr.hs @@ -25,6 +25,8 @@ module SPARC.Instr ( where +import GhcPrelude + import SPARC.Stack import SPARC.Imm import SPARC.AddrMode diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs index 88b04b952a..eb401fff06 100644 --- a/compiler/nativeGen/SPARC/Ppr.hs +++ b/compiler/nativeGen/SPARC/Ppr.hs @@ -25,6 +25,8 @@ where #include "HsVersions.h" #include "nativeGen/NCG.h" +import GhcPrelude + import SPARC.Regs import SPARC.Instr import SPARC.Cond @@ -38,11 +40,12 @@ import PprBase import Cmm hiding (topInfoTable) import PprCmm() +import BlockId import CLabel import Hoopl.Label import Hoopl.Collections -import Unique ( Uniquable(..), pprUniqueAlways ) +import Unique ( pprUniqueAlways ) import Outputable import Platform import FastString @@ -91,7 +94,7 @@ dspSection = Section Text $ pprBasicBlock :: LabelMap CmmStatics -> NatBasicBlock Instr -> SDoc pprBasicBlock info_env (BasicBlock blockid instrs) = maybe_infotable $$ - pprLabel (mkAsmTempLabel (getUnique blockid)) $$ + pprLabel (blockLbl blockid) $$ vcat (map pprInstr instrs) where maybe_infotable = case mapLookup blockid info_env of @@ -402,7 +405,7 @@ pprInstr (LD format addr reg) pprReg reg ] --- 64 bit FP storees are expanded into individual instructions in CodeGen.Expand +-- 64 bit FP stores are expanded into individual instructions in CodeGen.Expand pprInstr (ST FF64 reg _) | RegReal (RealRegSingle{}) <- reg = panic "SPARC.Ppr: not emitting potentially misaligned ST FF64 instr" @@ -541,7 +544,7 @@ pprInstr (BI cond b blockid) text "\tb", pprCond cond, if b then pp_comma_a else empty, char '\t', - ppr (mkAsmTempLabel (getUnique blockid)) + ppr (blockLbl blockid) ] pprInstr (BF cond b blockid) @@ -549,7 +552,7 @@ pprInstr (BF cond b blockid) text "\tfb", pprCond cond, if b then pp_comma_a else empty, char '\t', - ppr (mkAsmTempLabel (getUnique blockid)) + ppr (blockLbl blockid) ] pprInstr (JMP addr) = text "\tjmp\t" <> pprAddr addr diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs index 14a5192c2d..d6aadbae94 100644 --- a/compiler/nativeGen/SPARC/Regs.hs +++ b/compiler/nativeGen/SPARC/Regs.hs @@ -32,6 +32,8 @@ module SPARC.Regs ( where +import GhcPrelude + import CodeGen.Platform.SPARC import Reg import RegClass @@ -75,7 +77,7 @@ classOfRealReg reg -- | regSqueeze_class reg --- Calculuate the maximum number of register colors that could be +-- Calculate the maximum number of register colors that could be -- denied to a node of this class due to having this reg -- as a neighbour. -- @@ -191,7 +193,7 @@ fPair reg -- | All the regs that the register allocator can allocate to, --- with the the fixed use regs removed. +-- with the fixed use regs removed. -- allocatableRegs :: [RealReg] allocatableRegs diff --git a/compiler/nativeGen/SPARC/ShortcutJump.hs b/compiler/nativeGen/SPARC/ShortcutJump.hs index 123a345130..83e366cb04 100644 --- a/compiler/nativeGen/SPARC/ShortcutJump.hs +++ b/compiler/nativeGen/SPARC/ShortcutJump.hs @@ -8,6 +8,8 @@ module SPARC.ShortcutJump ( where +import GhcPrelude + import SPARC.Instr import SPARC.Imm @@ -16,8 +18,6 @@ import BlockId import Cmm import Panic -import Unique - data JumpDest @@ -46,14 +46,14 @@ shortcutStatics fn (Statics lbl statics) shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel shortcutLabel fn lab - | Just uq <- maybeAsmTemp lab = shortBlockId fn (mkBlockId uq) - | otherwise = lab + | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId + | otherwise = lab shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic shortcutStatic fn (CmmStaticLit (CmmLabel lab)) = CmmStaticLit (CmmLabel (shortcutLabel fn lab)) -shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) - = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w)) + = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w) -- slightly dodgy, we're ignoring the second label, but this -- works with the way we use CmmLabelDiffOff for jump tables now. shortcutStatic _ other_static @@ -63,7 +63,7 @@ shortcutStatic _ other_static shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel shortBlockId fn blockid = case fn blockid of - Nothing -> mkAsmTempLabel (getUnique blockid) + Nothing -> blockLbl blockid Just (DestBlockId blockid') -> shortBlockId fn blockid' Just (DestImm (ImmCLbl lbl)) -> lbl _other -> panic "shortBlockId" diff --git a/compiler/nativeGen/SPARC/Stack.hs b/compiler/nativeGen/SPARC/Stack.hs index 629b18789f..3f5b2a7289 100644 --- a/compiler/nativeGen/SPARC/Stack.hs +++ b/compiler/nativeGen/SPARC/Stack.hs @@ -7,6 +7,8 @@ module SPARC.Stack ( where +import GhcPrelude + import SPARC.AddrMode import SPARC.Regs import SPARC.Base diff --git a/compiler/nativeGen/TargetReg.hs b/compiler/nativeGen/TargetReg.hs index a298cccaf6..6800b9043b 100644 --- a/compiler/nativeGen/TargetReg.hs +++ b/compiler/nativeGen/TargetReg.hs @@ -21,6 +21,8 @@ where #include "HsVersions.h" +import GhcPrelude + import Reg import RegClass import Format diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 341fa43dbc..a2e26bd68b 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -2,9 +2,7 @@ -- The default iteration limit is a bit too low for the definitions -- in this module. -#if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-} -#endif ----------------------------------------------------------------------------- -- @@ -32,6 +30,8 @@ where #include "../includes/MachDeps.h" -- NCG stuff: +import GhcPrelude + import X86.Instr import X86.Cond import X86.Regs @@ -65,7 +65,6 @@ import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol ) import ForeignCall ( CCallConv(..) ) import OrdList import Outputable -import Unique import FastString import DynFlags import Util @@ -211,6 +210,9 @@ stmtToInstrs stmt = do -> genCCall dflags is32Bit target result_regs args CmmBranch id -> genBranch id + + --We try to arrange blocks such that the likely branch is the fallthrough + --in CmmContFlowOpt. So we can assume the condition is likely false here. CmmCondBranch arg true false _ -> do b1 <- genCondJump true arg b2 <- genBranch false @@ -295,7 +297,7 @@ data Amode = Amode AddrMode InstrBlock {- -Now, given a tree (the argument to an CmmLoad) that references memory, +Now, given a tree (the argument to a CmmLoad) that references memory, produce a suitable addressing mode. A Rule of the Game (tm) for Amodes: use of the addr bit must @@ -328,7 +330,7 @@ is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = mkAsmTempLabel (getUnique blockid) + where blockLabel = blockLbl blockid -- ----------------------------------------------------------------------------- @@ -466,6 +468,20 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do r_dst_lo ) +iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do + fn <- getAnyReg expr + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + code = fn r_dst_lo + return ( + ChildCode64 (code `snocOL` + MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL` + CLTD II32 `snocOL` + MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL` + MOV II32 (OpReg edx) (OpReg r_dst_hi)) + r_dst_lo + ) + iselExpr64 expr = pprPanic "iselExpr64(i386)" (ppr expr) @@ -503,6 +519,9 @@ getRegister' dflags is32Bit (CmmReg reg) getRegister' dflags is32Bit (CmmRegOff r n) = getRegister' dflags is32Bit $ mangleIndexTree dflags r n +getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e]) + = addAlignmentCheck align <$> getRegister' dflags is32Bit e + -- for 32-bit architectuers, support some 64 -> 32 bit conversions: -- TO_W_(x), TO_W_(x >> 32) @@ -731,8 +750,10 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_F_Ne _ -> condFltReg is32Bit NE x y MO_F_Gt _ -> condFltReg is32Bit GTT x y MO_F_Ge _ -> condFltReg is32Bit GE x y - MO_F_Lt _ -> condFltReg is32Bit LTT x y - MO_F_Le _ -> condFltReg is32Bit LE x y + -- Invert comparison condition and swap operands + -- See Note [SSE Parity Checks] + MO_F_Lt _ -> condFltReg is32Bit GTT y x + MO_F_Le _ -> condFltReg is32Bit GE y x MO_Eq _ -> condIntReg EQQ x y MO_Ne _ -> condIntReg NE x y @@ -1255,6 +1276,21 @@ isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit || isSuitableFloatingPointLit lit isOperand _ _ = False +-- | Given a 'Register', produce a new 'Register' with an instruction block +-- which will check the value for alignment. Used for @-falignment-sanitisation@. +addAlignmentCheck :: Int -> Register -> Register +addAlignmentCheck align reg = + case reg of + Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt reg) + Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt reg) + where + check :: Format -> Reg -> InstrBlock + check fmt reg = + ASSERT(not $ isFloatFormat fmt) + toOL [ TEST fmt (OpImm $ ImmInt $ align-1) (OpReg reg) + , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel + ] + memConstant :: Int -> CmmLit -> NatM Amode memConstant align lit = do lbl <- getNewLabelNat @@ -1331,15 +1367,17 @@ getCondCode (CmmMachOp mop [x, y]) MO_F_Ne W32 -> condFltCode NE x y MO_F_Gt W32 -> condFltCode GTT x y MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y + -- Invert comparison condition and swap operands + -- See Note [SSE Parity Checks] + MO_F_Lt W32 -> condFltCode GTT y x + MO_F_Le W32 -> condFltCode GE y x MO_F_Eq W64 -> condFltCode EQQ x y MO_F_Ne W64 -> condFltCode NE x y MO_F_Gt W64 -> condFltCode GTT x y MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y + MO_F_Lt W64 -> condFltCode GTT y x + MO_F_Le W64 -> condFltCode GE y x _ -> condIntCode (machOpToCond mop) x y @@ -1639,11 +1677,19 @@ genCondJump' _ id bool = do else do lbl <- getBlockIdNat - -- see comment with condFltReg + -- See Note [SSE Parity Checks] let code = case cond of NE -> or_unordered GU -> plain_test GEU -> plain_test + -- Use ASSERT so we don't break releases if + -- LTT/LE creep in somehow. + LTT -> + ASSERT2(False, ppr "Should have been turned into >") + and_ordered + LE -> + ASSERT2(False, ppr "Should have been turned into >=") + and_ordered _ -> and_ordered plain_test = unitOL ( @@ -1855,6 +1901,72 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] format = intFormat width lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width)) +genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst] + args@[src, mask] = do + let platform = targetPlatform dflags + if isBmi2Enabled dflags + then do code_src <- getAnyReg src + code_mask <- getAnyReg mask + src_r <- getNewRegNat format + mask_r <- getNewRegNat format + let dst_r = getRegisterReg platform False (CmmLocal dst) + return $ code_src src_r `appOL` code_mask mask_r `appOL` + (if width == W8 then + -- The PDEP instruction doesn't take a r/m8 + unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL` + unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL` + unitOL (PDEP II16 (OpReg mask_r) (OpReg src_r ) dst_r) + else + unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL` + (if width == W8 || width == W16 then + -- We used a 16-bit destination register above, + -- so zero-extend + unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) + else nilOL) + else do + targetExpr <- cmmMakeDynamicReference dflags + CallReference lbl + let target = ForeignTarget targetExpr (ForeignConvention CCallConv + [NoHint] [NoHint] + CmmMayReturn) + genCCall dflags is32Bit target dest_regs args + where + format = intFormat width + lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width)) + +genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst] + args@[src, mask] = do + let platform = targetPlatform dflags + if isBmi2Enabled dflags + then do code_src <- getAnyReg src + code_mask <- getAnyReg mask + src_r <- getNewRegNat format + mask_r <- getNewRegNat format + let dst_r = getRegisterReg platform False (CmmLocal dst) + return $ code_src src_r `appOL` code_mask mask_r `appOL` + (if width == W8 then + -- The PEXT instruction doesn't take a r/m8 + unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL` + unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL` + unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r) + else + unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL` + (if width == W8 || width == W16 then + -- We used a 16-bit destination register above, + -- so zero-extend + unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) + else nilOL) + else do + targetExpr <- cmmMakeDynamicReference dflags + CallReference lbl + let target = ForeignTarget targetExpr (ForeignConvention CCallConv + [NoHint] [NoHint] + CmmMayReturn) + genCCall dflags is32Bit target dest_regs args + where + format = intFormat width + lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width)) + genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] | is32Bit && width == W64 = do -- Fallback to `hs_clz64` on i386 @@ -2129,6 +2241,8 @@ genCCall _ is32Bit target dest_regs args = do ADC format (OpImm (ImmInteger 0)) (OpReg reg_h) return code _ -> panic "genCCall: Wrong number of arguments/results for add2" + (PrimTarget (MO_AddWordC width), [res_r, res_c]) -> + addSubIntC platform ADD_CC (const Nothing) CARRY width res_r res_c args (PrimTarget (MO_SubWordC width), [res_r, res_c]) -> addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args (PrimTarget (MO_AddIntC width), [res_r, res_c]) -> @@ -2645,6 +2759,10 @@ outOfLineCmmOp mop res args MO_F32_Tanh -> fsLit "tanhf" MO_F32_Pwr -> fsLit "powf" + MO_F32_Asinh -> fsLit "asinhf" + MO_F32_Acosh -> fsLit "acoshf" + MO_F32_Atanh -> fsLit "atanhf" + MO_F64_Sqrt -> fsLit "sqrt" MO_F64_Fabs -> fsLit "fabs" MO_F64_Sin -> fsLit "sin" @@ -2662,15 +2780,23 @@ outOfLineCmmOp mop res args MO_F64_Tanh -> fsLit "tanh" MO_F64_Pwr -> fsLit "pow" + MO_F64_Asinh -> fsLit "asinh" + MO_F64_Acosh -> fsLit "acosh" + MO_F64_Atanh -> fsLit "atanh" + MO_Memcpy _ -> fsLit "memcpy" MO_Memset _ -> fsLit "memset" MO_Memmove _ -> fsLit "memmove" + MO_Memcmp _ -> fsLit "memcmp" MO_PopCnt _ -> fsLit "popcnt" MO_BSwap _ -> fsLit "bswap" MO_Clz w -> fsLit $ clzLabel w MO_Ctz _ -> unsupported + MO_Pdep w -> fsLit $ pdepLabel w + MO_Pext w -> fsLit $ pextLabel w + MO_AtomicRMW _ _ -> fsLit "atomicrmw" MO_AtomicRead _ -> fsLit "atomicread" MO_AtomicWrite _ -> fsLit "atomicwrite" @@ -2684,6 +2810,7 @@ outOfLineCmmOp mop res args MO_Add2 {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported @@ -2698,7 +2825,7 @@ outOfLineCmmOp mop res args genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock genSwitch dflags expr targets - | gopt Opt_PIC dflags + | positionIndependent dflags = do (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset) -- getNonClobberedReg because it needs to survive across t_code @@ -2750,23 +2877,29 @@ genSwitch dflags expr targets JMP_TBL op ids (Section ReadOnlyData lbl) lbl ] return code - where (offset, ids) = switchTargetsToTable targets + where + (offset, blockIds) = switchTargetsToTable targets + ids = map (fmap DestBlockId) blockIds generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr) generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl) - = Just (createJumpTable dflags ids section lbl) + = let getBlockId (DestBlockId id) = id + getBlockId _ = panic "Non-Label target in Jump Table" + blockIds = map (fmap getBlockId) ids + in Just (createJumpTable dflags blockIds section lbl) generateJumpTableForInstr _ _ = Nothing createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel -> GenCmmDecl (Alignment, CmmStatics) h g createJumpTable dflags ids section lbl = let jumpTable - | gopt Opt_PIC dflags = - let jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 (wordWidth dflags)) + | positionIndependent dflags = + let ww = wordWidth dflags + jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 ww) jumpTableEntryRel (Just blockid) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel (getUnique blockid) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 ww) + where blockLabel = blockLbl blockid in map jumpTableEntryRel ids | otherwise = map (jumpTableEntry dflags) ids in CmmData section (1, Statics lbl jumpTable) @@ -2797,6 +2930,59 @@ condIntReg cond x y = do return (Any II32 code) +----------------------------------------------------------- +--- Note [SSE Parity Checks] --- +----------------------------------------------------------- + +-- We have to worry about unordered operands (eg. comparisons +-- against NaN). If the operands are unordered, the comparison +-- sets the parity flag, carry flag and zero flag. +-- All comparisons are supposed to return false for unordered +-- operands except for !=, which returns true. +-- +-- Optimisation: we don't have to test the parity flag if we +-- know the test has already excluded the unordered case: eg > +-- and >= test for a zero carry flag, which can only occur for +-- ordered operands. +-- +-- By reversing comparisons we can avoid testing the parity +-- for < and <= as well. If any of the arguments is an NaN we +-- return false either way. If both arguments are valid then +-- x <= y <-> y >= x holds. So it's safe to swap these. +-- +-- We invert the condition inside getRegister'and getCondCode +-- which should cover all invertable cases. +-- All other functions translating FP comparisons to assembly +-- use these to two generate the comparison code. +-- +-- As an example consider a simple check: +-- +-- func :: Float -> Float -> Int +-- func x y = if x < y then 1 else 0 +-- +-- Which in Cmm gives the floating point comparison. +-- +-- if (%MO_F_Lt_W32(F1, F2)) goto c2gg; else goto c2gf; +-- +-- We used to compile this to an assembly code block like this: +-- _c2gh: +-- ucomiss %xmm2,%xmm1 +-- jp _c2gf +-- jb _c2gg +-- jmp _c2gf +-- +-- Where we have to introduce an explicit +-- check for unordered results (using jmp parity): +-- +-- We can avoid this by exchanging the arguments and inverting the direction +-- of the comparison. This results in the sequence of: +-- +-- ucomiss %xmm1,%xmm2 +-- ja _c2g2 +-- jmp _c2g1 +-- +-- Removing the jump reduces the pressure on the branch predidiction system +-- and plays better with the uOP cache. condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87 @@ -2815,27 +3001,18 @@ condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87 CondCode _ cond cond_code <- condFltCode cond x y tmp1 <- getNewRegNat (archWordFormat is32Bit) tmp2 <- getNewRegNat (archWordFormat is32Bit) - let - -- We have to worry about unordered operands (eg. comparisons - -- against NaN). If the operands are unordered, the comparison - -- sets the parity flag, carry flag and zero flag. - -- All comparisons are supposed to return false for unordered - -- operands except for !=, which returns true. - -- - -- Optimisation: we don't have to test the parity flag if we - -- know the test has already excluded the unordered case: eg > - -- and >= test for a zero carry flag, which can only occur for - -- ordered operands. - -- - -- ToDo: by reversing comparisons we could avoid testing the - -- parity flag in more cases. - + let -- See Note [SSE Parity Checks] code dst = cond_code `appOL` (case cond of NE -> or_unordered dst GU -> plain_test dst GEU -> plain_test dst + -- Use ASSERT so we don't break releases if these creep in. + LTT -> ASSERT2(False, ppr "Should have been turned into >") + and_ordered dst + LE -> ASSERT2(False, ppr "Should have been turned into >=") + and_ordered dst _ -> and_ordered dst) plain_test dst = toOL [ diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs index 586dabd8f4..35cbf943e1 100644 --- a/compiler/nativeGen/X86/Cond.hs +++ b/compiler/nativeGen/X86/Cond.hs @@ -8,6 +8,8 @@ module X86.Cond ( where +import GhcPrelude + data Cond = ALWAYS -- What's really used? ToDo | EQQ diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 71f50e9d2a..c7000c9f4b 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -8,7 +8,7 @@ -- ----------------------------------------------------------------------------- -module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest, +module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..), getJumpDestBlockId, canShortcut, shortcutStatics, shortcutJump, i386_insert_ffrees, allocMoreStack, maxSpillSlots, archWordFormat) @@ -17,6 +17,8 @@ where #include "HsVersions.h" #include "nativeGen/NCG.h" +import GhcPrelude + import X86.Cond import X86.Regs import Instruction @@ -320,7 +322,7 @@ data Instr | JXX_GBL Cond Imm -- non-local version of JXX -- Table jump | JMP_TBL Operand -- Address to jump to - [Maybe BlockId] -- Blocks in the jump table + [Maybe JumpDest] -- Targets of the jump table Section -- Data section jump table should be put in CLabel -- Label of jump table | CALL (Either Imm Reg) [Reg] @@ -343,6 +345,10 @@ data Instr | BSF Format Operand Reg -- bit scan forward | BSR Format Operand Reg -- bit scan reverse + -- bit manipulation instructions + | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask + | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask + -- prefetch | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch -- variant can be NTA, Lvl0, Lvl1, or Lvl2 @@ -462,6 +468,9 @@ x86_regUsageOfInstr platform instr BSF _ src dst -> mkRU (use_R src []) [dst] BSR _ src dst -> mkRU (use_R src []) [dst] + PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst] + PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst] + -- note: might be a better way to do this PREFETCH _ _ src -> mkRU (use_R src []) [] LOCK i -> x86_regUsageOfInstr platform i @@ -638,6 +647,8 @@ x86_patchRegsOfInstr instr env CLTD _ -> instr POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst) + PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst) + PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst) BSF fmt src dst -> BSF fmt (patchOp src) (env dst) BSR fmt src dst -> BSR fmt (patchOp src) (env dst) @@ -693,7 +704,7 @@ x86_jumpDestsOfInstr x86_jumpDestsOfInstr insn = case insn of JXX _ id -> [id] - JMP_TBL _ ids _ _ -> [id | Just id <- ids] + JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids] _ -> [] @@ -704,8 +715,12 @@ x86_patchJumpInstr insn patchF = case insn of JXX cc id -> JXX cc (patchF id) JMP_TBL op ids section lbl - -> JMP_TBL op (map (fmap patchF) ids) section lbl + -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl _ -> insn + where + patchJumpDest f (DestBlockId id) = DestBlockId (f id) + patchJumpDest _ dest = dest + @@ -843,25 +858,104 @@ x86_mkJumpInstr x86_mkJumpInstr id = [JXX ALWAYS id] +-- Note [Windows stack layout] +-- | On most OSes the kernel will place a guard page after the current stack +-- page. If you allocate larger than a page worth you may jump over this +-- guard page. Not only is this a security issue, but on certain OSes such +-- as Windows a new page won't be allocated if you don't hit the guard. This +-- will cause a segfault or access fault. +-- +-- This function defines if the current allocation amount requires a probe. +-- On Windows (for now) we emit a call to _chkstk for this. For other OSes +-- this is not yet implemented. +-- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk +-- The Windows stack looks like this: +-- +-- +-------------------+ +-- | SP | +-- +-------------------+ +-- | | +-- | GUARD PAGE | +-- | | +-- +-------------------+ +-- | | +-- | | +-- | UNMAPPED | +-- | | +-- | | +-- +-------------------+ +-- +-- In essense each allocation larger than a page size needs to be chunked and +-- a probe emitted after each page allocation. You have to hit the guard +-- page so the kernel can map in the next page, otherwise you'll segfault. +-- +needs_probe_call :: Platform -> Int -> Bool +needs_probe_call platform amount + = case platformOS platform of + OSMinGW32 -> case platformArch platform of + ArchX86 -> amount > (4 * 1024) + ArchX86_64 -> amount > (8 * 1024) + _ -> False + _ -> False x86_mkStackAllocInstr :: Platform -> Int - -> Instr + -> [Instr] x86_mkStackAllocInstr platform amount - = case platformArch platform of - ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp) - ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) - _ -> panic "x86_mkStackAllocInstr" + = case platformOS platform of + OSMinGW32 -> + -- These will clobber AX but this should be ok because + -- + -- 1. It is the first thing we do when entering the closure and AX is + -- a caller saved registers on Windows both on x86_64 and x86. + -- + -- 2. The closures are only entered via a call or longjmp in which case + -- there are no expectations for volatile registers. + -- + -- 3. When the target is a local branch point it is re-targeted + -- after the dealloc, preserving #2. See note [extra spill slots]. + -- + -- We emit a call because the stack probes are quite involved and + -- would bloat code size a lot. GHC doesn't really have an -Os. + -- __chkstk is guaranteed to leave all nonvolatile registers and AX + -- untouched. It's part of the standard prologue code for any Windows + -- function dropping the stack more than a page. + -- See Note [Windows stack layout] + case platformArch platform of + ArchX86 | needs_probe_call platform amount -> + [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax) + , CALL (Left $ strImmLit "___chkstk_ms") [eax] + , SUB II32 (OpReg eax) (OpReg esp) + ] + | otherwise -> + [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) + , TEST II32 (OpReg esp) (OpReg esp) + ] + ArchX86_64 | needs_probe_call platform amount -> + [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax) + , CALL (Left $ strImmLit "__chkstk_ms") [rax] + , SUB II64 (OpReg rax) (OpReg rsp) + ] + | otherwise -> + [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) + , TEST II64 (OpReg rsp) (OpReg rsp) + ] + _ -> panic "x86_mkStackAllocInstr" + _ -> + case platformArch platform of + ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ] + ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ] + _ -> panic "x86_mkStackAllocInstr" x86_mkStackDeallocInstr :: Platform -> Int - -> Instr + -> [Instr] x86_mkStackDeallocInstr platform amount = case platformArch platform of - ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp) - ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp) + ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)] + ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)] _ -> panic "x86_mkStackDeallocInstr" i386_insert_ffrees @@ -981,7 +1075,7 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do insert_stack_insns (BasicBlock id insns) | Just new_blockid <- mapLookup id new_blockmap - = [ BasicBlock id [alloc, JXX ALWAYS new_blockid] + = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid] , BasicBlock new_blockid block' ] | otherwise = [ BasicBlock id block' ] @@ -989,7 +1083,7 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do block' = foldr insert_dealloc [] insns insert_dealloc insn r = case insn of - JMP _ _ -> dealloc : insn : r + JMP _ _ -> dealloc ++ (insn : r) JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL" _other -> x86_patchJumpInstr insn retarget : r where retarget b = fromMaybe b (mapLookup b new_blockmap) @@ -998,7 +1092,6 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do -- in return (CmmProc info lbl live (ListGraph new_code)) - data JumpDest = DestBlockId BlockId | DestImm Imm getJumpDestBlockId :: JumpDest -> Maybe BlockId @@ -1015,14 +1108,24 @@ canShortcut _ = Nothing -- The blockset helps avoid following cycles. shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn - where shortcutJump' fn seen insn@(JXX cc id) = - if setMember id seen then insn - else case fn id of - Nothing -> insn - Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id') - Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm) - where seen' = setInsert id seen - shortcutJump' _ _ other = other + where + shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr + shortcutJump' fn seen insn@(JXX cc id) = + if setMember id seen then insn + else case fn id of + Nothing -> insn + Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id') + Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm) + where seen' = setInsert id seen + shortcutJump' fn _ (JMP_TBL addr blocks section tblId) = + let updateBlock (Just (DestBlockId bid)) = + case fn bid of + Nothing -> Just (DestBlockId bid ) + Just dest -> Just dest + updateBlock dest = dest + blocks' = map updateBlock blocks + in JMP_TBL addr blocks' section tblId + shortcutJump' _ _ other = other -- Here because it knows about JumpDest shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics) @@ -1033,14 +1136,14 @@ shortcutStatics fn (align, Statics lbl statics) shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel shortcutLabel fn lab - | Just uq <- maybeAsmTemp lab = shortBlockId fn emptyUniqSet (mkBlockId uq) - | otherwise = lab + | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId + | otherwise = lab shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic shortcutStatic fn (CmmStaticLit (CmmLabel lab)) = CmmStaticLit (CmmLabel (shortcutLabel fn lab)) -shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) - = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w)) + = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w) -- slightly dodgy, we're ignoring the second label, but this -- works with the way we use CmmLabelDiffOff for jump tables now. shortcutStatic _ other_static @@ -1054,8 +1157,8 @@ shortBlockId shortBlockId fn seen blockid = case (elementOfUniqSet uq seen, fn blockid) of - (True, _) -> mkAsmTempLabel uq - (_, Nothing) -> mkAsmTempLabel uq + (True, _) -> blockLbl blockid + (_, Nothing) -> blockLbl blockid (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid' (_, Just (DestImm (ImmCLbl lbl))) -> lbl (_, _other) -> panic "shortBlockId" diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index fce432a3dc..03d4fce794 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -23,6 +23,8 @@ where #include "HsVersions.h" #include "nativeGen/NCG.h" +import GhcPrelude + import X86.Regs import X86.Instr import X86.Cond @@ -37,8 +39,9 @@ import Hoopl.Label import BasicTypes (Alignment) import DynFlags import Cmm hiding (topInfoTable) +import BlockId import CLabel -import Unique ( pprUniqueAlways, Uniquable(..) ) +import Unique ( pprUniqueAlways ) import Platform import FastString import Outputable @@ -70,12 +73,17 @@ import Data.Bits -- .subsections_via_symbols and -dead_strip can be found at -- <https://developer.apple.com/library/mac/documentation/DeveloperTools/Reference/Assembler/040-Assembler_Directives/asm_directives.html#//apple_ref/doc/uid/TP30000823-TPXREF101> +pprProcAlignment :: SDoc +pprProcAlignment = sdocWithDynFlags $ \dflags -> + (maybe empty pprAlign . cmmProcAlignment $ dflags) + pprNatCmmDecl :: NatCmmDecl (Alignment, CmmStatics) Instr -> SDoc pprNatCmmDecl (CmmData section dats) = pprSectionAlign section $$ pprDatas dats pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = sdocWithDynFlags $ \dflags -> + pprProcAlignment $$ case topInfoTable proc of Nothing -> case blocks of @@ -83,6 +91,7 @@ pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = pprLabel lbl blocks -> -- special case for code without info table: pprSectionAlign (Section Text lbl) $$ + pprProcAlignment $$ pprLabel lbl $$ -- blocks guaranteed not null, so label needed vcat (map (pprBasicBlock top_info) blocks) $$ (if debugLevel dflags > 0 @@ -92,6 +101,7 @@ pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = Just (Statics info_lbl _) -> sdocWithPlatform $ \platform -> pprSectionAlign (Section Text info_lbl) $$ + pprProcAlignment $$ (if platformHasSubsectionsViaSymbols platform then ppr (mkDeadStripPreventer info_lbl) <> char ':' else empty) $$ @@ -126,7 +136,7 @@ pprBasicBlock info_env (BasicBlock blockid instrs) (if debugLevel dflags > 0 then ppr (mkAsmTempEndLabel asmLbl) <> char ':' else empty) where - asmLbl = mkAsmTempLabel (getUnique blockid) + asmLbl = blockLbl blockid maybe_infotable = case mapLookup blockid info_env of Nothing -> empty Just (Statics info_lbl info) -> @@ -160,35 +170,116 @@ pprGloblDecl lbl | not (externallyVisibleCLabel lbl) = empty | otherwise = text ".globl " <> ppr lbl -pprTypeAndSizeDecl :: CLabel -> SDoc -pprTypeAndSizeDecl lbl +pprLabelType' :: DynFlags -> CLabel -> SDoc +pprLabelType' dflags lbl = + if isCFunctionLabel lbl || functionOkInfoTable then + text "@function" + else + text "@object" + where + {- + NOTE: This is a bit hacky. + + With the `tablesNextToCode` info tables look like this: + ``` + <info table data> + label_info: + <info table code> + ``` + So actually info table label points exactly to the code and we can mark + the label as @function. (This is required to make perf and potentially other + tools to work on Haskell binaries). + This usually works well but it can cause issues with a linker. + A linker uses different algorithms for the relocation depending on + the symbol type.For some reason, a linker will generate JUMP_SLOT relocation + when constructor info table is referenced from a data section. + This only happens with static constructor call so + we mark _con_info symbols as `@object` to avoid the issue with relocations. + + @SimonMarlow hack explanation: + "The reasoning goes like this: + + * The danger when we mark a symbol as `@function` is that the linker will + redirect it to point to the PLT and use a `JUMP_SLOT` relocation when + the symbol refers to something outside the current shared object. + A PLT / JUMP_SLOT reference only works for symbols that we jump to, not + for symbols representing data,, nor for info table symbol references which + we expect to point directly to the info table. + * GHC generates code that might refer to any info table symbol from the text + segment, but that's OK, because those will be explicit GOT references + generated by the code generator. + * When we refer to info tables from the data segment, it's either + * a FUN_STATIC/THUNK_STATIC local to this module + * a `con_info` that could be from anywhere + + So, the only info table symbols that we might refer to from the data segment + of another shared object are `con_info` symbols, so those are the ones we + need to exclude from getting the @function treatment. + " + + A good place to check for more + https://ghc.haskell.org/trac/ghc/wiki/Commentary/PositionIndependentCode + + Another possible hack is to create an extra local function symbol for + every code-like thing to give the needed information for to the tools + but mess up with the relocation. https://phabricator.haskell.org/D4730 + -} + functionOkInfoTable = tablesNextToCode dflags && + isInfoTableLabel lbl && not (isConInfoTableLabel lbl) + + +pprTypeDecl :: CLabel -> SDoc +pprTypeDecl lbl = sdocWithPlatform $ \platform -> if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl - then text ".type " <> ppr lbl <> ptext (sLit ", @object") + then + sdocWithDynFlags $ \df -> + text ".type " <> ppr lbl <> ptext (sLit ", ") <> pprLabelType' df lbl else empty pprLabel :: CLabel -> SDoc pprLabel lbl = pprGloblDecl lbl - $$ pprTypeAndSizeDecl lbl + $$ pprTypeDecl lbl $$ (ppr lbl <> char ':') +{- +Note [Pretty print ASCII when AsmCodeGen] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Previously, when generating assembly code, we created SDoc with +`(ptext . sLit)` for every bytes in literal bytestring, then +combine them using `hcat`. + +When handling literal bytestrings with millions of bytes, +millions of SDoc would be created and to combine, leading to +high memory usage. + +Now we escape the given bytestring to string directly and construct +SDoc only once. This improvement could dramatically decrease the +memory allocation from 4.7GB to 1.3GB when embedding a 3MB literal +string in source code. See Trac #14741 for profiling results. +-} pprASCII :: [Word8] -> SDoc pprASCII str - = hcat (map (do1 . fromIntegral) str) + -- Transform this given literal bytestring to escaped string and construct + -- the literal SDoc directly. + -- See Trac #14741 + -- and Note [Pretty print ASCII when AsmCodeGen] + = ptext $ sLit $ foldr (\w s -> (do1 . fromIntegral) w ++ s) "" str where - do1 :: Int -> SDoc - do1 w | '\t' <- chr w = ptext (sLit "\\t") - do1 w | '\n' <- chr w = ptext (sLit "\\n") - do1 w | '"' <- chr w = ptext (sLit "\\\"") - do1 w | '\\' <- chr w = ptext (sLit "\\\\") - do1 w | isPrint (chr w) = char (chr w) - do1 w | otherwise = char '\\' <> octal w - - octal :: Int -> SDoc - octal w = int ((w `div` 64) `mod` 8) - <> int ((w `div` 8) `mod` 8) - <> int (w `mod` 8) + do1 :: Int -> String + do1 w | '\t' <- chr w = "\\t" + | '\n' <- chr w = "\\n" + | '"' <- chr w = "\\\"" + | '\\' <- chr w = "\\\\" + | isPrint (chr w) = [chr w] + | otherwise = '\\' : octal w + + octal :: Int -> String + octal w = [ chr (ord '0' + (w `div` 64) `mod` 8) + , chr (ord '0' + (w `div` 8) `mod` 8) + , chr (ord '0' + w `mod` 8) + ] pprAlign :: Int -> SDoc pprAlign bytes @@ -505,7 +596,7 @@ pprDataItem' dflags lit -- case lit of -- A relative relocation: - CmmLabelDiffOff _ _ _ -> + CmmLabelDiffOff _ _ _ _ -> [text "\t.long\t" <> pprImm imm, text "\t.long\t0"] _ -> @@ -516,7 +607,7 @@ pprDataItem' dflags lit asmComment :: SDoc -> SDoc -asmComment c = ifPprDebug $ text "# " <> c +asmComment c = whenPprDebug $ text "# " <> c pprInstr :: Instr -> SDoc @@ -645,6 +736,9 @@ pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst) pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst) +pprInstr (PDEP format src mask dst) = pprFormatOpOpReg (sLit "pdep") format src mask dst +pprInstr (PEXT format src mask dst) = pprFormatOpOpReg (sLit "pext") format src mask dst + pprInstr (PREFETCH NTA format src ) = pprFormatOp_ (sLit "prefetchnta") format src pprInstr (PREFETCH Lvl0 format src) = pprFormatOp_ (sLit "prefetcht0") format src pprInstr (PREFETCH Lvl1 format src) = pprFormatOp_ (sLit "prefetcht1") format src @@ -702,7 +796,7 @@ pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op) pprInstr (JXX cond blockid) = pprCondInstr (sLit "j") cond (ppr lab) - where lab = mkAsmTempLabel (getUnique blockid) + where lab = blockLbl blockid pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm) @@ -1259,6 +1353,16 @@ pprFormatRegRegReg name format reg1 reg2 reg3 pprReg format reg3 ] +pprFormatOpOpReg :: LitString -> Format -> Operand -> Operand -> Reg -> SDoc +pprFormatOpOpReg name format op1 op2 reg3 + = hcat [ + pprMnemonic name format, + pprOperand format op1, + comma, + pprOperand format op2, + comma, + pprReg format reg3 + ] pprFormatAddrReg :: LitString -> Format -> AddrMode -> Reg -> SDoc pprFormatAddrReg name format op dst @@ -1302,4 +1406,3 @@ pprFormatOpOpCoerce name format1 format2 op1 op2 pprCondInstr :: LitString -> Cond -> SDoc -> SDoc pprCondInstr name cond arg = hcat [ char '\t', ptext name, pprCond cond, space, arg] - diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs index 4dfe0350d4..226441b16f 100644 --- a/compiler/nativeGen/X86/RegInfo.hs +++ b/compiler/nativeGen/X86/RegInfo.hs @@ -9,6 +9,8 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" +import GhcPrelude + import Format import Reg diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index 4cb82ea224..97c3b984e2 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -48,6 +48,8 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" +import GhcPrelude + import CodeGen.Platform import Reg import RegClass @@ -58,8 +60,10 @@ import DynFlags import Outputable import Platform +import qualified Data.Array as A + -- | regSqueeze_class reg --- Calculuate the maximum number of register colors that could be +-- Calculate the maximum number of register colors that could be -- denied to a node of this class due to having this reg -- as a neighbour. -- @@ -142,7 +146,7 @@ litToImm (CmmFloat f W32) = ImmFloat f litToImm (CmmFloat f W64) = ImmDouble f litToImm (CmmLabel l) = ImmCLbl l litToImm (CmmLabelOff l off) = ImmIndex l off -litToImm (CmmLabelDiffOff l1 l2 off) +litToImm (CmmLabelDiffOff l1 l2 off _) = ImmConstantSum (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) (ImmInt off) @@ -234,7 +238,6 @@ xmmregnos platform = [firstxmm .. lastxmm platform] floatregnos :: Platform -> [RegNo] floatregnos platform = fakeregnos ++ xmmregnos platform - -- argRegs is the set of regs which are read for an n-argument call to C. -- For archs which pass all args on the stack (x86), is empty. -- Sparc passes up to the first 6 args in regs. @@ -267,13 +270,13 @@ showReg platform n | n >= firstxmm = "%xmm" ++ show (n-firstxmm) | n >= firstfake = "%fake" ++ show (n-firstfake) | n >= 8 = "%r" ++ show n - | otherwise = regNames platform !! n + | otherwise = regNames platform A.! n -regNames :: Platform -> [String] +regNames :: Platform -> A.Array Int String regNames platform = if target32Bit platform - then ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp"] - else ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp"] + then A.listArray (0,8) ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp"] + else A.listArray (0,8) ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp"] @@ -404,7 +407,10 @@ callClobberedRegs platform | target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform) | platformOS platform == OSMinGW32 = [rax,rcx,rdx,r8,r9,r10,r11] - ++ map regSingle (floatregnos platform) + -- Only xmm0-5 are caller-saves registers on 64bit windows. + -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage ) + -- For details check the Win64 ABI. + ++ map regSingle fakeregnos ++ map xmm [0 .. 5] | otherwise -- all xmm regs are caller-saves -- caller-saves registers |