summaryrefslogtreecommitdiff
path: root/compiler/nativeGen
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r--compiler/nativeGen/AsmCodeGen.hs24
-rw-r--r--compiler/nativeGen/CPrim.hs22
-rw-r--r--compiler/nativeGen/Dwarf.hs15
-rw-r--r--compiler/nativeGen/Dwarf/Constants.hs2
-rw-r--r--compiler/nativeGen/Dwarf/Types.hs16
-rw-r--r--compiler/nativeGen/Format.hs3
-rw-r--r--compiler/nativeGen/Instruction.hs12
-rw-r--r--compiler/nativeGen/NCGMonad.hs7
-rw-r--r--compiler/nativeGen/PIC.hs66
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs207
-rw-r--r--compiler/nativeGen/PPC/Cond.hs2
-rw-r--r--compiler/nativeGen/PPC/Instr.hs71
-rw-r--r--compiler/nativeGen/PPC/Ppr.hs89
-rw-r--r--compiler/nativeGen/PPC/RegInfo.hs12
-rw-r--r--compiler/nativeGen/PPC/Regs.hs6
-rw-r--r--compiler/nativeGen/PprBase.hs2
-rw-r--r--compiler/nativeGen/Reg.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/ArchBase.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/ArchX86.hs27
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Coalesce.hs6
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Main.hs3
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Spill.hs13
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/SpillClean.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/SpillCost.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Stats.hs17
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Base.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Main.hs28
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/StackMap.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/State.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Stats.hs3
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Liveness.hs10
-rw-r--r--compiler/nativeGen/RegClass.hs2
-rw-r--r--compiler/nativeGen/SPARC/AddrMode.hs2
-rw-r--r--compiler/nativeGen/SPARC/Base.hs2
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs31
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Amode.hs2
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Base.hs2
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/CondCode.hs2
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Expand.hs4
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Gen32.hs2
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Gen64.hs20
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Sanity.hs2
-rw-r--r--compiler/nativeGen/SPARC/Cond.hs2
-rw-r--r--compiler/nativeGen/SPARC/Imm.hs4
-rw-r--r--compiler/nativeGen/SPARC/Instr.hs2
-rw-r--r--compiler/nativeGen/SPARC/Ppr.hs13
-rw-r--r--compiler/nativeGen/SPARC/Regs.hs6
-rw-r--r--compiler/nativeGen/SPARC/ShortcutJump.hs14
-rw-r--r--compiler/nativeGen/SPARC/Stack.hs2
-rw-r--r--compiler/nativeGen/TargetReg.hs2
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs247
-rw-r--r--compiler/nativeGen/X86/Cond.hs2
-rw-r--r--compiler/nativeGen/X86/Instr.hs161
-rw-r--r--compiler/nativeGen/X86/Ppr.hs149
-rw-r--r--compiler/nativeGen/X86/RegInfo.hs2
-rw-r--r--compiler/nativeGen/X86/Regs.hs22
63 files changed, 1073 insertions, 317 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.hs b/compiler/nativeGen/AsmCodeGen.hs
index 45d170e28d..79c3440ff6 100644
--- a/compiler/nativeGen/AsmCodeGen.hs
+++ b/compiler/nativeGen/AsmCodeGen.hs
@@ -25,6 +25,8 @@ module AsmCodeGen (
#include "nativeGen/NCG.h"
+import GhcPrelude
+
import qualified X86.CodeGen
import qualified X86.Regs
import qualified X86.Instr
@@ -363,7 +365,7 @@ finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
-- build the global register conflict graph
let graphGlobal
- = foldl Color.union Color.initGraph
+ = foldl' Color.union Color.initGraph
$ [ Color.raGraph stat
| stat@Color.RegAllocStatsStart{} <- stats]
@@ -927,16 +929,18 @@ generateJumpTables ncgImpl xs = concatMap f xs
shortcutBranches
:: DynFlags
- -> NcgImpl statics instr jumpDest
+ -> NcgImpl statics instr jumpDest
-> [NatCmmDecl statics instr]
-> [NatCmmDecl statics instr]
shortcutBranches dflags ncgImpl tops
- | optLevel dflags < 1 = tops -- only with -O or higher
- | otherwise = map (apply_mapping ncgImpl mapping) tops'
+ | gopt Opt_AsmShortcutting dflags
+ = map (apply_mapping ncgImpl mapping) tops'
+ | otherwise
+ = tops
where
(tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
- mapping = foldr plusUFM emptyUFM mappings
+ mapping = plusUFMList mappings
build_mapping :: NcgImpl statics instr jumpDest
-> GenCmmDecl d (LabelMap t) (ListGraph instr)
@@ -953,7 +957,7 @@ build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
-- shorted.
-- Don't completely eliminate loops here -- that can leave a dangling jump!
(_, shortcut_blocks, others) =
- foldl split (setEmpty :: LabelSet, [], []) blocks
+ foldl' split (setEmpty :: LabelSet, [], []) blocks
split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
| Just jd <- canShortcut ncgImpl insn,
Just dest <- getJumpDestBlockId ncgImpl jd,
@@ -970,7 +974,7 @@ build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
has_info l = mapMember l info
-- build a mapping from BlockId to JumpDest for shorting branches
- mapping = foldl add emptyUFM shortcut_blocks
+ mapping = foldl' add emptyUFM shortcut_blocks
add ufm (id,dest) = addToUFM ufm id dest
apply_mapping :: NcgImpl statics instr jumpDest
@@ -1212,15 +1216,15 @@ cmmExprNative referenceKind expr = do
-- to use the register table, so we replace these registers
-- with the corresponding labels:
CmmReg (CmmGlobal EagerBlackholeInfo)
- | arch == ArchPPC && not (gopt Opt_PIC dflags)
+ | arch == ArchPPC && not (positionIndependent dflags)
-> cmmExprNative referenceKind $
CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info")))
CmmReg (CmmGlobal GCEnter1)
- | arch == ArchPPC && not (gopt Opt_PIC dflags)
+ | arch == ArchPPC && not (positionIndependent dflags)
-> cmmExprNative referenceKind $
CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1")))
CmmReg (CmmGlobal GCFun)
- | arch == ArchPPC && not (gopt Opt_PIC dflags)
+ | arch == ArchPPC && not (positionIndependent dflags)
-> cmmExprNative referenceKind $
CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun")))
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs
index c52fe10b13..399d646000 100644
--- a/compiler/nativeGen/CPrim.hs
+++ b/compiler/nativeGen/CPrim.hs
@@ -5,12 +5,16 @@ module CPrim
, atomicRMWLabel
, cmpxchgLabel
, popCntLabel
+ , pdepLabel
+ , pextLabel
, bSwapLabel
, clzLabel
, ctzLabel
, word2FloatLabel
) where
+import GhcPrelude
+
import CmmType
import CmmMachOp
import Outputable
@@ -24,6 +28,24 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w
pprWidth W64 = "64"
pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
+pdepLabel :: Width -> String
+pdepLabel w = "hs_pdep" ++ pprWidth w
+ where
+ pprWidth W8 = "8"
+ pprWidth W16 = "16"
+ pprWidth W32 = "32"
+ pprWidth W64 = "64"
+ pprWidth w = pprPanic "pdepLabel: Unsupported word width " (ppr w)
+
+pextLabel :: Width -> String
+pextLabel w = "hs_pext" ++ pprWidth w
+ where
+ pprWidth W8 = "8"
+ pprWidth W16 = "16"
+ pprWidth W32 = "32"
+ pprWidth W64 = "64"
+ pprWidth w = pprPanic "pextLabel: Unsupported word width " (ppr w)
+
bSwapLabel :: Width -> String
bSwapLabel w = "hs_bswap" ++ pprWidth w
where
diff --git a/compiler/nativeGen/Dwarf.hs b/compiler/nativeGen/Dwarf.hs
index afeac030fd..0e645a2a56 100644
--- a/compiler/nativeGen/Dwarf.hs
+++ b/compiler/nativeGen/Dwarf.hs
@@ -2,6 +2,8 @@ module Dwarf (
dwarfGen
) where
+import GhcPrelude
+
import CLabel
import CmmExpr ( GlobalReg(..) )
import Config ( cProjectName, cProjectVersion )
@@ -147,7 +149,7 @@ debugSplitProcs b = concat $ H.mapElems $ mergeMaps $ map (split Nothing) b
{-
Note [Splitting DebugBlocks]
-DWARF requires that we break up the the nested DebugBlocks produced from
+DWARF requires that we break up the nested DebugBlocks produced from
the C-- AST. For instance, we begin with tick trees containing nested procs.
For example,
@@ -180,10 +182,17 @@ procToDwarf df prc
_otherwise -> showSDocDump df $ ppr $ dblLabel prc
, dwLabel = dblCLabel prc
, dwParent = fmap mkAsmTempDieLabel
- $ mfilter (/= dblCLabel prc)
+ $ mfilter goodParent
$ fmap dblCLabel (dblParent prc)
- -- Omit parent if it would be self-referential
}
+ where
+ goodParent a | a == dblCLabel prc = False
+ -- Omit parent if it would be self-referential
+ goodParent a | not (externallyVisibleCLabel a)
+ , debugLevel df < 2 = False
+ -- We strip block information when running -g0 or -g1, don't
+ -- refer to blocks in that case. Fixes #14894.
+ goodParent _ = True
-- | Generate DWARF info for a block
blockToDwarf :: DynFlags -> DebugBlock -> DwarfInfo
diff --git a/compiler/nativeGen/Dwarf/Constants.hs b/compiler/nativeGen/Dwarf/Constants.hs
index a8034ef295..db5395af35 100644
--- a/compiler/nativeGen/Dwarf/Constants.hs
+++ b/compiler/nativeGen/Dwarf/Constants.hs
@@ -3,6 +3,8 @@
module Dwarf.Constants where
+import GhcPrelude
+
import AsmUtils
import FastString
import Platform
diff --git a/compiler/nativeGen/Dwarf/Types.hs b/compiler/nativeGen/Dwarf/Types.hs
index 3c4501f613..25629448dd 100644
--- a/compiler/nativeGen/Dwarf/Types.hs
+++ b/compiler/nativeGen/Dwarf/Types.hs
@@ -22,6 +22,8 @@ module Dwarf.Types
)
where
+import GhcPrelude
+
import Debug
import CLabel
import CmmExpr ( GlobalReg(..) )
@@ -344,7 +346,7 @@ pprFrameProc frameLbl initUw (DwarfFrameProc procLbl hasInfo blocks)
procEnd = mkAsmTempEndLabel procLbl
ifInfo str = if hasInfo then text str else empty
-- see [Note: Info Offset]
- in vcat [ ifPprDebug $ text "# Unwinding for" <+> ppr procLbl <> colon
+ in vcat [ whenPprDebug $ text "# Unwinding for" <+> ppr procLbl <> colon
, pprData4' (ppr fdeEndLabel <> char '-' <> ppr fdeLabel)
, ppr fdeLabel <> colon
, pprData4' (ppr frameLbl <> char '-' <>
@@ -413,6 +415,12 @@ pprFrameBlock (DwarfFrameBlock hasInfo uws0) =
-- Note that this will not prevent GDB from failing to look-up the
-- correct function name for the frame, as that uses the symbol table,
-- which we can not manipulate as easily.
+--
+-- There's a GDB patch to address this at [1]. At the moment of writing
+-- it's not merged, so I recommend building GDB with the patch if you
+-- care about unwinding. The hack above doesn't cover every case.
+--
+-- [1] https://sourceware.org/ml/gdb-patches/2018-02/msg00055.html
-- | Get DWARF register ID for a given GlobalReg
dwarfGlobalRegNo :: Platform -> GlobalReg -> Word8
@@ -490,9 +498,11 @@ pprUnwindExpr spIsCFA expr
pprE (UwPlus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_plus
pprE (UwMinus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_minus
pprE (UwTimes u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_mul
- in text "\t.uleb128 1f-.-1" $$ -- DW_FORM_block length
+ in text "\t.uleb128 2f-1f" $$ -- DW_FORM_block length
+ -- computed as the difference of the following local labels 2: and 1:
+ text "1:" $$
pprE expr $$
- text "1:"
+ text "2:"
-- | Generate code for re-setting the unwind information for a
-- register to @undefined@
diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs
index 00811f1375..82ecbecc14 100644
--- a/compiler/nativeGen/Format.hs
+++ b/compiler/nativeGen/Format.hs
@@ -20,6 +20,8 @@ module Format (
where
+import GhcPrelude
+
import Cmm
import Outputable
@@ -68,6 +70,7 @@ floatFormat width
= case width of
W32 -> FF32
W64 -> FF64
+ W80 -> FF80
other -> pprPanic "Format.floatFormat" (ppr other)
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
index 515d4f3d85..0bd99fbee8 100644
--- a/compiler/nativeGen/Instruction.hs
+++ b/compiler/nativeGen/Instruction.hs
@@ -14,6 +14,8 @@ module Instruction (
where
+import GhcPrelude
+
import Reg
import BlockId
@@ -189,14 +191,12 @@ class Instruction instr where
-- Subtract an amount from the C stack pointer
mkStackAllocInstr
- :: Platform -- TODO: remove (needed by x86/x86_64
- -- because they share an Instr type)
+ :: Platform
-> Int
- -> instr
+ -> [instr]
-- Add an amount to the C stack pointer
mkStackDeallocInstr
- :: Platform -- TODO: remove (needed by x86/x86_64
- -- because they share an Instr type)
+ :: Platform
-> Int
- -> instr
+ -> [instr]
diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs
index 6af0df5b01..b9532e17b5 100644
--- a/compiler/nativeGen/NCGMonad.hs
+++ b/compiler/nativeGen/NCGMonad.hs
@@ -37,6 +37,8 @@ where
#include "HsVersions.h"
+import GhcPrelude
+
import Reg
import Format
import TargetReg
@@ -44,7 +46,7 @@ import TargetReg
import BlockId
import Hoopl.Collections
import Hoopl.Label
-import CLabel ( CLabel, mkAsmTempLabel )
+import CLabel ( CLabel )
import Debug
import FastString ( FastString )
import UniqFM
@@ -158,8 +160,7 @@ getBlockIdNat
getNewLabelNat :: NatM CLabel
getNewLabelNat
- = do u <- getUniqueNat
- return (mkAsmTempLabel u)
+ = blockLbl <$> getBlockIdNat
getNewRegNat :: Format -> NatM Reg
diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs
index bef0a21235..2f300c4614 100644
--- a/compiler/nativeGen/PIC.hs
+++ b/compiler/nativeGen/PIC.hs
@@ -47,6 +47,8 @@ module PIC (
where
+import GhcPrelude
+
import qualified PPC.Instr as PPC
import qualified PPC.Regs as PPC
@@ -162,7 +164,7 @@ cmmMakePicReference dflags lbl
| OSAIX <- platformOS $ targetPlatform dflags
= CmmMachOp (MO_Add W32)
[ CmmReg (CmmGlobal PicBaseReg)
- , CmmLit $ picRelative
+ , CmmLit $ picRelative dflags
(platformArch $ targetPlatform dflags)
(platformOS $ targetPlatform dflags)
lbl ]
@@ -171,15 +173,16 @@ cmmMakePicReference dflags lbl
| ArchPPC_64 _ <- platformArch $ targetPlatform dflags
= CmmMachOp (MO_Add W32) -- code model medium
[ CmmReg (CmmGlobal PicBaseReg)
- , CmmLit $ picRelative
+ , CmmLit $ picRelative dflags
(platformArch $ targetPlatform dflags)
(platformOS $ targetPlatform dflags)
lbl ]
- | (gopt Opt_PIC dflags || WayDyn `elem` ways dflags) && absoluteLabel lbl
+ | (positionIndependent dflags || gopt Opt_ExternalDynamicRefs dflags)
+ && absoluteLabel lbl
= CmmMachOp (MO_Add (wordWidth dflags))
[ CmmReg (CmmGlobal PicBaseReg)
- , CmmLit $ picRelative
+ , CmmLit $ picRelative dflags
(platformArch $ targetPlatform dflags)
(platformOS $ targetPlatform dflags)
lbl ]
@@ -236,7 +239,7 @@ howToAccessLabel
howToAccessLabel dflags _ OSMinGW32 this_mod _ lbl
-- Assume all symbols will be in the same PE, so just access them directly.
- | WayDyn `notElem` ways dflags
+ | not (gopt Opt_ExternalDynamicRefs dflags)
= AccessDirectly
-- If the target symbol is in another PE we need to access it via the
@@ -272,7 +275,7 @@ howToAccessLabel dflags arch OSDarwin this_mod DataReference lbl
-- we'd need to pass the current Module all the way in to
-- this function.
| arch /= ArchX86_64
- , gopt Opt_PIC dflags && externallyVisibleCLabel lbl
+ , positionIndependent dflags && externallyVisibleCLabel lbl
= AccessViaSymbolPtr
| otherwise
@@ -313,8 +316,8 @@ howToAccessLabel _dflags _arch OSAIX _this_mod kind _lbl
--
-- ELF tries to pretend to the main application code that dynamic linking does
-- not exist. While this may sound convenient, it tends to mess things up in
--- very bad ways, so we have to be careful when we generate code for the main
--- program (-dynamic but no -fPIC).
+-- very bad ways, so we have to be careful when we generate code for a non-PIE
+-- main program (-dynamic but no -fPIC).
--
-- Indirect access is required for references to imported symbols
-- from position independent code. It is also required from the main program
@@ -337,7 +340,8 @@ howToAccessLabel dflags _ os _ _ _
-- if we don't dynamically link to Haskell code,
-- it actually manages to do so without messing things up.
| osElfTarget os
- , not (gopt Opt_PIC dflags) && WayDyn `notElem` ways dflags
+ , not (positionIndependent dflags) &&
+ not (gopt Opt_ExternalDynamicRefs dflags)
= AccessDirectly
howToAccessLabel dflags arch os this_mod DataReference lbl
@@ -351,7 +355,7 @@ howToAccessLabel dflags arch os this_mod DataReference lbl
-- via a symbol pointer (see below for an explanation why
-- PowerPC32 Linux is especially broken).
| arch == ArchPPC
- , gopt Opt_PIC dflags
+ , positionIndependent dflags
-> AccessViaSymbolPtr
| otherwise
@@ -372,12 +376,13 @@ howToAccessLabel dflags arch os this_mod DataReference lbl
howToAccessLabel dflags arch os this_mod CallReference lbl
| osElfTarget os
- , labelDynamic dflags this_mod lbl && not (gopt Opt_PIC dflags)
+ , labelDynamic dflags this_mod lbl && not (positionIndependent dflags)
= AccessDirectly
| osElfTarget os
, arch /= ArchX86
- , labelDynamic dflags this_mod lbl && gopt Opt_PIC dflags
+ , labelDynamic dflags this_mod lbl
+ , positionIndependent dflags
= AccessViaStub
howToAccessLabel dflags _ os this_mod _ lbl
@@ -388,7 +393,7 @@ howToAccessLabel dflags _ os this_mod _ lbl
-- all other platforms
howToAccessLabel dflags _ _ _ _ _
- | not (gopt Opt_PIC dflags)
+ | not (positionIndependent dflags)
= AccessDirectly
| otherwise
@@ -397,10 +402,10 @@ howToAccessLabel dflags _ _ _ _ _
-- -------------------------------------------------------------------
--- | Says what we we have to add to our 'PIC base register' in order to
+-- | Says what we have to add to our 'PIC base register' in order to
-- get the address of a label.
-picRelative :: Arch -> OS -> CLabel -> CmmLit
+picRelative :: DynFlags -> Arch -> OS -> CLabel -> CmmLit
-- Darwin, but not x86_64:
-- The PIC base register points to the PIC base label at the beginning
@@ -409,15 +414,15 @@ picRelative :: Arch -> OS -> CLabel -> CmmLit
-- We have already made sure that all labels that are not from the current
-- module are accessed indirectly ('as' can't calculate differences between
-- undefined labels).
-picRelative arch OSDarwin lbl
+picRelative dflags arch OSDarwin lbl
| arch /= ArchX86_64
- = CmmLabelDiffOff lbl mkPicBaseLabel 0
+ = CmmLabelDiffOff lbl mkPicBaseLabel 0 (wordWidth dflags)
-- On AIX we use an indirect local TOC anchored by 'gotLabel'.
-- This way we use up only one global TOC entry per compilation-unit
-- (this is quite similiar to GCC's @-mminimal-toc@ compilation mode)
-picRelative _ OSAIX lbl
- = CmmLabelDiffOff lbl gotLabel 0
+picRelative dflags _ OSAIX lbl
+ = CmmLabelDiffOff lbl gotLabel 0 (wordWidth dflags)
-- PowerPC Linux:
-- The PIC base register points to our fake GOT. Use a label difference
@@ -425,9 +430,9 @@ picRelative _ OSAIX lbl
-- We have made sure that *everything* is accessed indirectly, so this
-- is only used for offsets from the GOT to symbol pointers inside the
-- GOT.
-picRelative ArchPPC os lbl
+picRelative dflags ArchPPC os lbl
| osElfTarget os
- = CmmLabelDiffOff lbl gotLabel 0
+ = CmmLabelDiffOff lbl gotLabel 0 (wordWidth dflags)
-- Most Linux versions:
@@ -437,7 +442,7 @@ picRelative ArchPPC os lbl
-- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers,
-- and a GotSymbolOffset label for other things.
-- For reasons of tradition, the symbol offset label is written as a plain label.
-picRelative arch os lbl
+picRelative _ arch os lbl
| osElfTarget os || (os == OSDarwin && arch == ArchX86_64)
= let result
| Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
@@ -448,7 +453,7 @@ picRelative arch os lbl
in result
-picRelative _ _ _
+picRelative _ _ _ _
= panic "PositionIndependentCode.picRelative undefined for this platform"
@@ -467,7 +472,7 @@ needImportedSymbols dflags arch os
-- PowerPC Linux: -fPIC or -dynamic
| osElfTarget os
, arch == ArchPPC
- = gopt Opt_PIC dflags || WayDyn `elem` ways dflags
+ = positionIndependent dflags || gopt Opt_ExternalDynamicRefs dflags
-- PowerPC 64 Linux: always
| osElfTarget os
@@ -477,7 +482,8 @@ needImportedSymbols dflags arch os
-- i386 (and others?): -dynamic but not -fPIC
| osElfTarget os
, arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
- = WayDyn `elem` ways dflags && not (gopt Opt_PIC dflags)
+ = gopt Opt_ExternalDynamicRefs dflags &&
+ not (positionIndependent dflags)
| otherwise
= False
@@ -499,7 +505,7 @@ gotLabel
-- However, for PIC on x86, we need a small helper function.
pprGotDeclaration :: DynFlags -> Arch -> OS -> SDoc
pprGotDeclaration dflags ArchX86 OSDarwin
- | gopt Opt_PIC dflags
+ | positionIndependent dflags
= vcat [
text ".section __TEXT,__textcoal_nt,coalesced,no_toc",
text ".weak_definition ___i686.get_pc_thunk.ax",
@@ -540,7 +546,7 @@ pprGotDeclaration _ (ArchPPC_64 _) _
pprGotDeclaration dflags arch os
| osElfTarget os
, arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
- , not (gopt Opt_PIC dflags)
+ , not (positionIndependent dflags)
= empty
| osElfTarget os
@@ -565,7 +571,7 @@ pprGotDeclaration _ _ _
pprImportedSymbol :: DynFlags -> Platform -> CLabel -> SDoc
pprImportedSymbol dflags platform@(Platform { platformArch = ArchPPC, platformOS = OSDarwin }) importedLbl
| Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
- = case gopt Opt_PIC dflags of
+ = case positionIndependent dflags of
False ->
vcat [
text ".symbol_stub",
@@ -619,7 +625,7 @@ pprImportedSymbol dflags platform@(Platform { platformArch = ArchPPC, platformOS
pprImportedSymbol dflags platform@(Platform { platformArch = ArchX86, platformOS = OSDarwin }) importedLbl
| Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
- = case gopt Opt_PIC dflags of
+ = case positionIndependent dflags of
False ->
vcat [
text ".symbol_stub",
@@ -652,7 +658,7 @@ pprImportedSymbol dflags platform@(Platform { platformArch = ArchX86, platformOS
text "\tjmp dyld_stub_binding_helper"
]
$+$ vcat [ text ".section __DATA, __la_sym_ptr"
- <> (if gopt Opt_PIC dflags then int 2 else int 3)
+ <> (if positionIndependent dflags then int 2 else int 3)
<> text ",lazy_symbol_pointers",
text "L" <> pprCLabel platform lbl <> ptext (sLit "$lazy_ptr:"),
text "\t.indirect_symbol" <+> pprCLabel platform lbl,
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 1e88a1d025..f246ec36f1 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -25,6 +25,8 @@ where
#include "../includes/MachDeps.h"
-- NCG stuff:
+import GhcPrelude
+
import CodeGen.Platform
import PPC.Instr
import PPC.Cond
@@ -52,7 +54,6 @@ import Hoopl.Graph
-- The rest:
import OrdList
import Outputable
-import Unique
import DynFlags
import Control.Monad ( mapAndUnzipM, when )
@@ -90,13 +91,23 @@ cmmTopCodeGen (CmmProc info lab live graph) = do
case picBaseMb of
Just picBase -> initializePicBase_ppc arch os picBase tops
Nothing -> return tops
- ArchPPC_64 ELF_V1 -> return tops
+ ArchPPC_64 ELF_V1 -> fixup_entry tops
-- generating function descriptor is handled in
-- pretty printer
- ArchPPC_64 ELF_V2 -> return tops
+ ArchPPC_64 ELF_V2 -> fixup_entry tops
-- generating function prologue is handled in
-- pretty printer
_ -> panic "PPC.cmmTopCodeGen: unknown arch"
+ where
+ fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
+ = do
+ let BasicBlock bID insns = entry
+ bID' <- if lab == (blockLbl bID)
+ then newBlockId
+ else return bID
+ let b' = BasicBlock bID' insns
+ return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
+ fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
cmmTopCodeGen (CmmData sec dat) = do
return [CmmData sec dat] -- no translation, we just use CmmStatic
@@ -161,8 +172,8 @@ stmtToInstrs stmt = do
-> genCCall target result_regs args
CmmBranch id -> genBranch id
- CmmCondBranch arg true false _ -> do
- b1 <- genCondJump true arg
+ CmmCondBranch arg true false prediction -> do
+ b1 <- genCondJump true arg prediction
b2 <- genBranch false
return (b1 `appOL` b2)
CmmSwitch arg ids -> do dflags <- getDynFlags
@@ -214,7 +225,7 @@ getRegisterReg platform (CmmGlobal mid)
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = mkAsmTempLabel (getUnique blockid)
+ where blockLabel = blockLbl blockid
@@ -371,6 +382,14 @@ iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
mov_lo = MR rlo expr_reg
return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
rlo
+
+iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
+ (expr_reg,expr_code) <- getSomeReg expr
+ (rlo, rhi) <- getNewRegPairNat II32
+ let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
+ mov_lo = MR rlo expr_reg
+ return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
+ rlo
iselExpr64 expr
= pprPanic "iselExpr64(powerpc)" (pprExpr expr)
@@ -719,7 +738,7 @@ data Amode
= Amode AddrMode InstrBlock
{-
-Now, given a tree (the argument to an CmmLoad) that references memory,
+Now, given a tree (the argument to a CmmLoad) that references memory,
produce a suitable addressing mode.
A Rule of the Game (tm) for Amodes: use of the addr bit must
@@ -1070,11 +1089,12 @@ comparison to do.
genCondJump
:: BlockId -- the branch target
-> CmmExpr -- the condition on which to branch
+ -> Maybe Bool
-> NatM InstrBlock
-genCondJump id bool = do
+genCondJump id bool prediction = do
CondCode _ cond code <- getCondCode bool
- return (code `snocOL` BCC cond id)
+ return (code `snocOL` BCC cond id prediction)
@@ -1098,6 +1118,90 @@ genCCall (PrimTarget MO_Touch) _ _
genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
= return $ nilOL
+genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ fmt = intFormat width
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ (instr, n_code) <- case amop of
+ AMO_Add -> getSomeRegOrImm ADD True reg_dst
+ AMO_Sub -> case n of
+ CmmLit (CmmInt i _)
+ | Just imm <- makeImmediate width True (-i)
+ -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
+ _
+ -> do
+ (n_reg, n_code) <- getSomeReg n
+ return (SUBF reg_dst n_reg reg_dst, n_code)
+ AMO_And -> getSomeRegOrImm AND False reg_dst
+ AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
+ return (NAND reg_dst reg_dst n_reg, n_code)
+ AMO_Or -> getSomeRegOrImm OR False reg_dst
+ AMO_Xor -> getSomeRegOrImm XOR False reg_dst
+ Amode addr_reg addr_code <- getAmodeIndex addr
+ lbl_retry <- getBlockIdNat
+ return $ n_code `appOL` addr_code
+ `appOL` toOL [ HWSYNC
+ , BCC ALWAYS lbl_retry Nothing
+
+ , NEWBLOCK lbl_retry
+ , LDR fmt reg_dst addr_reg
+ , instr
+ , STC fmt reg_dst addr_reg
+ , BCC NE lbl_retry (Just False)
+ , ISYNC
+ ]
+ where
+ getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
+ = do
+ (regX, codeX) <- getSomeReg x
+ (regY, codeY) <- getSomeReg y
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+ getAmodeIndex other
+ = do
+ (reg, code) <- getSomeReg other
+ return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
+ getSomeRegOrImm op sign dst
+ = case n of
+ CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
+ -> return (op dst dst (RIImm imm), nilOL)
+ _
+ -> do
+ (n_reg, n_code) <- getSomeReg n
+ return (op dst dst (RIReg n_reg), n_code)
+
+genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ fmt = intFormat width
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ form = if widthInBits width == 64 then DS else D
+ Amode addr_reg addr_code <- getAmode form addr
+ lbl_end <- getBlockIdNat
+ return $ addr_code `appOL` toOL [ HWSYNC
+ , LD fmt reg_dst addr_reg
+ , CMP fmt reg_dst (RIReg reg_dst)
+ , BCC NE lbl_end (Just False)
+ , BCC ALWAYS lbl_end Nothing
+ -- See Note [Seemingly useless cmp and bne]
+ , NEWBLOCK lbl_end
+ , ISYNC
+ ]
+
+-- Note [Seemingly useless cmp and bne]
+-- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
+-- the second paragraph says that isync may complete before storage accesses
+-- "associated" with a preceding instruction have been performed. The cmp
+-- operation and the following bne introduce a data and control dependency
+-- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
+-- Fetch).
+-- This is also what gcc does.
+
+
+genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
+ code <- assignMem_IntCode (intFormat width) addr val
+ return $ unitOL(HWSYNC) `appOL` code
+
genCCall (PrimTarget (MO_Clz width)) [dst] [src]
= do dflags <- getDynFlags
let platform = targetPlatform dflags
@@ -1110,17 +1214,17 @@ genCCall (PrimTarget (MO_Clz width)) [dst] [src]
lbl3 <- getBlockIdNat
let vr_hi = getHiVRegFromLo vr_lo
cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
- , BCC NE lbl2
- , BCC ALWAYS lbl1
+ , BCC NE lbl2 Nothing
+ , BCC ALWAYS lbl1 Nothing
, NEWBLOCK lbl1
, CNTLZ II32 reg_dst vr_lo
, ADD reg_dst reg_dst (RIImm (ImmInt 32))
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl2
, CNTLZ II32 reg_dst vr_hi
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl3
]
@@ -1167,8 +1271,8 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
cnttzlo <- cnttz format reg_dst vr_lo
let vr_hi = getHiVRegFromLo vr_lo
cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
- , BCC NE lbl2
- , BCC ALWAYS lbl1
+ , BCC NE lbl2 Nothing
+ , BCC ALWAYS lbl1 Nothing
, NEWBLOCK lbl1
, ADD x' vr_hi (RIImm (ImmInt (-1)))
@@ -1176,12 +1280,12 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
, CNTLZ format r' x''
-- 32 + (32 - clz(x''))
, SUBFC reg_dst r' (RIImm (ImmInt 64))
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl2
]
`appOL` cnttzlo `appOL`
- toOL [ BCC ALWAYS lbl3
+ toOL [ BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl3
]
@@ -1229,6 +1333,7 @@ genCCall target dest_regs argsAndHints
PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
argsAndHints
PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
+ PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
dest_regs argsAndHints
@@ -1315,21 +1420,21 @@ genCCall target dest_regs argsAndHints
-- rhat = un32 - q1*vn1
, MULL fmt tmp q1 (RIReg vn1)
, SUBF rhat tmp un32
- , BCC ALWAYS again1
+ , BCC ALWAYS again1 Nothing
, NEWBLOCK again1
-- if (q1 >= b || q1*vn0 > b*rhat + un1)
, CMPL fmt q1 (RIReg b)
- , BCC GEU then1
- , BCC ALWAYS no1
+ , BCC GEU then1 Nothing
+ , BCC ALWAYS no1 Nothing
, NEWBLOCK no1
, MULL fmt tmp q1 (RIReg vn0)
, SL fmt tmp1 rhat (RIImm (ImmInt half))
, ADD tmp1 tmp1 (RIReg un1)
, CMPL fmt tmp (RIReg tmp1)
- , BCC LEU endif1
- , BCC ALWAYS then1
+ , BCC LEU endif1 Nothing
+ , BCC ALWAYS then1 Nothing
, NEWBLOCK then1
-- q1 = q1 - 1
@@ -1338,8 +1443,8 @@ genCCall target dest_regs argsAndHints
, ADD rhat rhat (RIReg vn1)
-- if (rhat < b) goto again1
, CMPL fmt rhat (RIReg b)
- , BCC LTT again1
- , BCC ALWAYS endif1
+ , BCC LTT again1 Nothing
+ , BCC ALWAYS endif1 Nothing
, NEWBLOCK endif1
-- un21 = un32*b + un1 - q1*v
@@ -1353,21 +1458,21 @@ genCCall target dest_regs argsAndHints
-- rhat = un21- q0*vn1
, MULL fmt tmp q0 (RIReg vn1)
, SUBF rhat tmp un21
- , BCC ALWAYS again2
+ , BCC ALWAYS again2 Nothing
, NEWBLOCK again2
-- if (q0>b || q0*vn0 > b*rhat + un0)
, CMPL fmt q0 (RIReg b)
- , BCC GEU then2
- , BCC ALWAYS no2
+ , BCC GEU then2 Nothing
+ , BCC ALWAYS no2 Nothing
, NEWBLOCK no2
, MULL fmt tmp q0 (RIReg vn0)
, SL fmt tmp1 rhat (RIImm (ImmInt half))
, ADD tmp1 tmp1 (RIReg un0)
, CMPL fmt tmp (RIReg tmp1)
- , BCC LEU endif2
- , BCC ALWAYS then2
+ , BCC LEU endif2 Nothing
+ , BCC ALWAYS then2 Nothing
, NEWBLOCK then2
-- q0 = q0 - 1
@@ -1376,8 +1481,8 @@ genCCall target dest_regs argsAndHints
, ADD rhat rhat (RIReg vn1)
-- if (rhat<b) goto again2
, CMPL fmt rhat (RIReg b)
- , BCC LTT again2
- , BCC ALWAYS endif2
+ , BCC LTT again2 Nothing
+ , BCC ALWAYS endif2 Nothing
, NEWBLOCK endif2
-- compute remainder
@@ -1419,6 +1524,11 @@ genCCall target dest_regs argsAndHints
add2Op _ _ _
= panic "genCCall: Wrong number of arguments/results for add2"
+ addcOp platform [res_r, res_c] [arg_x, arg_y]
+ = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
+ addcOp _ _ _
+ = panic "genCCall: Wrong number of arguments/results for addc"
+
-- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
-- which is 0 for borrow and 1 otherwise. We need 1 and 0
-- so xor with 1.
@@ -1598,7 +1708,7 @@ genCCall' dflags gcp target dest_regs args
uses_pic_base_implicitly = do
-- See Note [implicit register in PPC PIC code]
-- on why we claim to use PIC register here
- when (gopt Opt_PIC dflags && target32Bit platform) $ do
+ when (positionIndependent dflags && target32Bit platform) $ do
_ <- getPicBaseNat $ archWordFormat True
return ()
@@ -1881,6 +1991,10 @@ genCCall' dflags gcp target dest_regs args
MO_F32_Tanh -> (fsLit "tanh", True)
MO_F32_Pwr -> (fsLit "pow", True)
+ MO_F32_Asinh -> (fsLit "asinh", True)
+ MO_F32_Acosh -> (fsLit "acosh", True)
+ MO_F32_Atanh -> (fsLit "atanh", True)
+
MO_F64_Exp -> (fsLit "exp", False)
MO_F64_Log -> (fsLit "log", False)
MO_F64_Sqrt -> (fsLit "sqrt", False)
@@ -1899,32 +2013,40 @@ genCCall' dflags gcp target dest_regs args
MO_F64_Tanh -> (fsLit "tanh", False)
MO_F64_Pwr -> (fsLit "pow", False)
+ MO_F64_Asinh -> (fsLit "asinh", False)
+ MO_F64_Acosh -> (fsLit "acosh", False)
+ MO_F64_Atanh -> (fsLit "atanh", False)
+
MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
MO_Memcpy _ -> (fsLit "memcpy", False)
MO_Memset _ -> (fsLit "memset", False)
MO_Memmove _ -> (fsLit "memmove", False)
+ MO_Memcmp _ -> (fsLit "memcmp", False)
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
- MO_Clz w -> (fsLit $ clzLabel w, False)
- MO_Ctz w -> (fsLit $ ctzLabel w, False)
- MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
+ MO_Pdep w -> (fsLit $ pdepLabel w, False)
+ MO_Pext w -> (fsLit $ pextLabel w, False)
+ MO_Clz _ -> unsupported
+ MO_Ctz _ -> unsupported
+ MO_AtomicRMW {} -> unsupported
MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
- MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
- MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
+ MO_AtomicRead _ -> unsupported
+ MO_AtomicWrite _ -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
MO_Add2 {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- (MO_Prefetch_Data _ ) -> unsupported
+ MO_Prefetch_Data _ -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported")
@@ -1950,7 +2072,7 @@ genSwitch dflags expr targets
]
return code
- | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
+ | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags)
= do
(reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
@@ -1988,15 +2110,16 @@ generateJumpTableForInstr :: DynFlags -> Instr
-> Maybe (NatCmmDecl CmmStatics Instr)
generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
let jumpTable
- | (gopt Opt_PIC dflags)
+ | (positionIndependent dflags)
|| (not $ target32Bit $ targetPlatform dflags)
= map jumpTableEntryRel ids
| otherwise = map (jumpTableEntry dflags) ids
where jumpTableEntryRel Nothing
= CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntryRel (Just blockid)
- = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
- where blockLabel = mkAsmTempLabel (getUnique blockid)
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
+ (wordWidth dflags))
+ where blockLabel = blockLbl blockid
in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
generateJumpTableForInstr _ _ = Nothing
diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs
index 0e4b1fd701..bd8bdee81a 100644
--- a/compiler/nativeGen/PPC/Cond.hs
+++ b/compiler/nativeGen/PPC/Cond.hs
@@ -8,6 +8,8 @@ module PPC.Cond (
where
+import GhcPrelude
+
import Panic
data Cond
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index eb179c5a99..8eb5e8fa8d 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -23,6 +23,8 @@ module PPC.Instr (
where
+import GhcPrelude
+
import PPC.Regs
import PPC.Cond
import Instruction
@@ -75,19 +77,19 @@ instance Instruction Instr where
mkStackDeallocInstr = ppc_mkStackDeallocInstr
-ppc_mkStackAllocInstr :: Platform -> Int -> Instr
+ppc_mkStackAllocInstr :: Platform -> Int -> [Instr]
ppc_mkStackAllocInstr platform amount
= ppc_mkStackAllocInstr' platform (-amount)
-ppc_mkStackDeallocInstr :: Platform -> Int -> Instr
+ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr]
ppc_mkStackDeallocInstr platform amount
= ppc_mkStackAllocInstr' platform amount
-ppc_mkStackAllocInstr' :: Platform -> Int -> Instr
+ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr]
ppc_mkStackAllocInstr' platform amount
= case platformArch platform of
- ArchPPC -> UPDATE_SP II32 (ImmInt amount)
- ArchPPC_64 _ -> UPDATE_SP II64 (ImmInt amount)
+ ArchPPC -> [UPDATE_SP II32 (ImmInt amount)]
+ ArchPPC_64 _ -> [UPDATE_SP II64 (ImmInt amount)]
_ -> panic $ "ppc_mkStackAllocInstr' "
++ show (platformArch platform)
@@ -124,7 +126,7 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
insert_stack_insns (BasicBlock id insns)
| Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id [alloc, BCC ALWAYS new_blockid]
+ = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing]
, BasicBlock new_blockid block'
]
| otherwise
@@ -137,11 +139,11 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
-- "labeled-goto" we use JMP, and for "computed-goto" we
-- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
= case insn of
- JMP _ -> dealloc : insn : r
- BCTR [] Nothing -> dealloc : insn : r
+ JMP _ -> dealloc ++ (insn : r)
+ BCTR [] Nothing -> dealloc ++ (insn : r)
BCTR ids label -> BCTR (map (fmap retarget) ids) label : r
- BCCFAR cond b -> BCCFAR cond (retarget b) : r
- BCC cond b -> BCC cond (retarget b) : r
+ BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
+ BCC cond b p -> BCC cond (retarget b) p : r
_ -> insn : r
-- BL and BCTRL are call-like instructions rather than
-- jumps, and are used only for C calls.
@@ -190,10 +192,12 @@ data Instr
-- Loads and stores.
| LD Format Reg AddrMode -- Load format, dst, src
| LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset
+ | LDR Format Reg AddrMode -- Load and reserve format, dst, src
| LA Format Reg AddrMode -- Load arithmetic format, dst, src
| ST Format Reg AddrMode -- Store format, src, dst
| STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset
| STU Format Reg AddrMode -- Store with Update format, src, dst
+ | STC Format Reg AddrMode -- Store conditional format, src, dst
| LIS Reg Imm -- Load Immediate Shifted dst, src
| LI Reg Imm -- Load Immediate dst, src
| MR Reg Reg -- Move Register dst, src -- also for fmr
@@ -201,8 +205,12 @@ data Instr
| CMP Format Reg RI -- format, src1, src2
| CMPL Format Reg RI -- format, src1, src2
- | BCC Cond BlockId
- | BCCFAR Cond BlockId
+ | BCC Cond BlockId (Maybe Bool) -- cond, block, hint
+ | BCCFAR Cond BlockId (Maybe Bool) -- cond, block, hint
+ -- hint:
+ -- Just True: branch likely taken
+ -- Just False: branch likely not taken
+ -- Nothing: no hint
| JMP CLabel -- same as branch,
-- but with CLabel instead of block ID
| MTCTR Reg
@@ -232,6 +240,7 @@ data Instr
| DIV Format Bool Reg Reg Reg
| AND Reg Reg RI -- dst, src1, src2
| ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2
+ | NAND Reg Reg Reg -- dst, src1, src2
| OR Reg Reg RI -- dst, src1, src2
| ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2
| XOR Reg Reg RI -- dst, src1, src2
@@ -272,6 +281,8 @@ data Instr
| MFLR Reg -- move from link register
| FETCHPC Reg -- pseudo-instruction:
-- bcl to next insn, mflr reg
+ | HWSYNC -- heavy weight sync
+ | ISYNC -- instruction synchronize
| LWSYNC -- memory barrier
| NOP -- no operation, PowerPC 64 bit
-- needs this as place holder to
@@ -290,17 +301,19 @@ ppc_regUsageOfInstr platform instr
= case instr of
LD _ reg addr -> usage (regAddr addr, [reg])
LDFAR _ reg addr -> usage (regAddr addr, [reg])
+ LDR _ reg addr -> usage (regAddr addr, [reg])
LA _ reg addr -> usage (regAddr addr, [reg])
ST _ reg addr -> usage (reg : regAddr addr, [])
STFAR _ reg addr -> usage (reg : regAddr addr, [])
STU _ reg addr -> usage (reg : regAddr addr, [])
+ STC _ reg addr -> usage (reg : regAddr addr, [])
LIS reg _ -> usage ([], [reg])
LI reg _ -> usage ([], [reg])
MR reg1 reg2 -> usage ([reg2], [reg1])
CMP _ reg ri -> usage (reg : regRI ri,[])
CMPL _ reg ri -> usage (reg : regRI ri,[])
- BCC _ _ -> noUsage
- BCCFAR _ _ -> noUsage
+ BCC _ _ _ -> noUsage
+ BCCFAR _ _ _ -> noUsage
MTCTR reg -> usage ([reg],[])
BCTR _ _ -> noUsage
BL _ params -> usage (params, callClobberedRegs platform)
@@ -325,6 +338,7 @@ ppc_regUsageOfInstr platform instr
AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
ORIS reg1 reg2 _ -> usage ([reg2], [reg1])
XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
@@ -380,17 +394,19 @@ ppc_patchRegsOfInstr instr env
= case instr of
LD fmt reg addr -> LD fmt (env reg) (fixAddr addr)
LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr)
+ LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr)
LA fmt reg addr -> LA fmt (env reg) (fixAddr addr)
ST fmt reg addr -> ST fmt (env reg) (fixAddr addr)
STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr)
STU fmt reg addr -> STU fmt (env reg) (fixAddr addr)
+ STC fmt reg addr -> STC fmt (env reg) (fixAddr addr)
LIS reg imm -> LIS (env reg) imm
LI reg imm -> LI (env reg) imm
MR reg1 reg2 -> MR (env reg1) (env reg2)
CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri)
CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri)
- BCC cond lbl -> BCC cond lbl
- BCCFAR cond lbl -> BCCFAR cond lbl
+ BCC cond lbl p -> BCC cond lbl p
+ BCCFAR cond lbl p -> BCCFAR cond lbl p
MTCTR reg -> MTCTR (env reg)
BCTR targets lbl -> BCTR targets lbl
BL imm argRegs -> BL imm argRegs -- argument regs
@@ -417,6 +433,7 @@ ppc_patchRegsOfInstr instr env
AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3)
+ NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3)
OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm
XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
@@ -480,8 +497,8 @@ ppc_isJumpishInstr instr
ppc_jumpDestsOfInstr :: Instr -> [BlockId]
ppc_jumpDestsOfInstr insn
= case insn of
- BCC _ id -> [id]
- BCCFAR _ id -> [id]
+ BCC _ id _ -> [id]
+ BCCFAR _ id _ -> [id]
BCTR targets _ -> [id | Just id <- targets]
_ -> []
@@ -492,8 +509,8 @@ ppc_jumpDestsOfInstr insn
ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
ppc_patchJumpInstr insn patchF
= case insn of
- BCC cc id -> BCC cc (patchF id)
- BCCFAR cc id -> BCCFAR cc (patchF id)
+ BCC cc id p -> BCC cc (patchF id) p
+ BCCFAR cc id p -> BCCFAR cc (patchF id) p
BCTR ids lbl -> BCTR (map (fmap patchF) ids) lbl
_ -> insn
@@ -631,16 +648,12 @@ ppc_mkRegRegMoveInstr src dst
-- | Make an unconditional jump instruction.
--- For architectures with branch delay slots, its ok to put
--- a NOP after the jump. Don't fill the delay slot with an
--- instruction that references regs or you'll confuse the
--- linear allocator.
ppc_mkJumpInstr
:: BlockId
-> [Instr]
ppc_mkJumpInstr id
- = [BCC ALWAYS id]
+ = [BCC ALWAYS id Nothing]
-- | Take the source and destination from this reg -> reg move instruction
@@ -669,12 +682,12 @@ makeFarBranches info_env blocks
handleBlock addr (BasicBlock id instrs)
= BasicBlock id (zipWith makeFar [addr..] instrs)
- makeFar _ (BCC ALWAYS tgt) = BCC ALWAYS tgt
- makeFar addr (BCC cond tgt)
+ makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing
+ makeFar addr (BCC cond tgt p)
| abs (addr - targetAddr) >= nearLimit
- = BCCFAR cond tgt
+ = BCCFAR cond tgt p
| otherwise
- = BCC cond tgt
+ = BCC cond tgt p
where Just targetAddr = lookupUFM blockAddressMap tgt
makeFar _ other = other
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index 63d01c3913..2f64d82ee5 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -9,6 +9,8 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module PPC.Ppr (pprNatCmmDecl) where
+import GhcPrelude
+
import PPC.Regs
import PPC.Instr
import PPC.Cond
@@ -23,9 +25,10 @@ import Cmm hiding (topInfoTable)
import Hoopl.Collections
import Hoopl.Label
+import BlockId
import CLabel
-import Unique ( pprUniqueAlways, Uniquable(..) )
+import Unique ( pprUniqueAlways, getUnique )
import Platform
import FastString
import Outputable
@@ -78,19 +81,17 @@ pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
pprFunctionDescriptor :: CLabel -> SDoc
pprFunctionDescriptor lab = pprGloblDecl lab
- $$ text ".section \".opd\",\"aw\""
- $$ text ".align 3"
+ $$ text "\t.section \".opd\", \"aw\""
+ $$ text "\t.align 3"
$$ ppr lab <> char ':'
- $$ text ".quad ."
- <> ppr lab
- <> text ",.TOC.@tocbase,0"
- $$ text ".previous"
- $$ text ".type "
- <> ppr lab
- <> text ", @function"
- $$ char '.'
- <> ppr lab
- <> char ':'
+ $$ text "\t.quad ."
+ <> ppr lab
+ <> text ",.TOC.@tocbase,0"
+ $$ text "\t.previous"
+ $$ text "\t.type"
+ <+> ppr lab
+ <> text ", @function"
+ $$ char '.' <> ppr lab <> char ':'
pprFunctionPrologue :: CLabel ->SDoc
pprFunctionPrologue lab = pprGloblDecl lab
@@ -108,7 +109,7 @@ pprFunctionPrologue lab = pprGloblDecl lab
pprBasicBlock :: LabelMap CmmStatics -> NatBasicBlock Instr -> SDoc
pprBasicBlock info_env (BasicBlock blockid instrs)
= maybe_infotable $$
- pprLabel (mkAsmTempLabel (getUnique blockid)) $$
+ pprLabel (blockLbl blockid) $$
vcat (map pprInstr instrs)
where
maybe_infotable = case mapLookup blockid info_env of
@@ -310,11 +311,13 @@ pprImm (HIGHESTA i)
pprAddr :: AddrMode -> SDoc
pprAddr (AddrRegReg r1 r2)
- = pprReg r1 <+> text ", " <+> pprReg r2
-
-pprAddr (AddrRegImm r1 (ImmInt i)) = hcat [ int i, char '(', pprReg r1, char ')' ]
-pprAddr (AddrRegImm r1 (ImmInteger i)) = hcat [ integer i, char '(', pprReg r1, char ')' ]
-pprAddr (AddrRegImm r1 imm) = hcat [ pprImm imm, char '(', pprReg r1, char ')' ]
+ = pprReg r1 <> char ',' <+> pprReg r2
+pprAddr (AddrRegImm r1 (ImmInt i))
+ = hcat [ int i, char '(', pprReg r1, char ')' ]
+pprAddr (AddrRegImm r1 (ImmInteger i))
+ = hcat [ integer i, char '(', pprReg r1, char ')' ]
+pprAddr (AddrRegImm r1 imm)
+ = hcat [ pprImm imm, char '(', pprReg r1, char ')' ]
pprSectionAlign :: Section -> SDoc
@@ -450,15 +453,27 @@ pprInstr (LD fmt reg addr) = hcat [
text ", ",
pprAddr addr
]
+
pprInstr (LDFAR fmt reg (AddrRegImm source off)) =
sdocWithPlatform $ \platform -> vcat [
pprInstr (ADDIS (tmpReg platform) source (HA off)),
pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off)))
]
-
pprInstr (LDFAR _ _ _) =
panic "PPC.Ppr.pprInstr LDFAR: no match"
+pprInstr (LDR fmt reg1 addr) = hcat [
+ text "\tl",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC.Ppr.Instr LDR: no match",
+ text "arx\t",
+ pprReg reg1,
+ text ", ",
+ pprAddr addr
+ ]
+
pprInstr (LA fmt reg addr) = hcat [
char '\t',
text "l",
@@ -508,6 +523,17 @@ pprInstr (STU fmt reg addr) = hcat [
text ", ",
pprAddr addr
]
+pprInstr (STC fmt reg1 addr) = hcat [
+ text "\tst",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC.Ppr.Instr STC: no match",
+ text "cx.\t",
+ pprReg reg1,
+ text ", ",
+ pprAddr addr
+ ]
pprInstr (LIS reg imm) = hcat [
char '\t',
text "lis",
@@ -569,19 +595,25 @@ pprInstr (CMPL fmt reg ri) = hcat [
RIReg _ -> empty
RIImm _ -> char 'i'
]
-pprInstr (BCC cond blockid) = hcat [
+pprInstr (BCC cond blockid prediction) = hcat [
char '\t',
text "b",
pprCond cond,
+ pprPrediction prediction,
char '\t',
ppr lbl
]
- where lbl = mkAsmTempLabel (getUnique blockid)
+ where lbl = mkLocalBlockLabel (getUnique blockid)
+ pprPrediction p = case p of
+ Nothing -> empty
+ Just True -> char '+'
+ Just False -> char '-'
-pprInstr (BCCFAR cond blockid) = vcat [
+pprInstr (BCCFAR cond blockid prediction) = vcat [
hcat [
text "\tb",
pprCond (condNegate cond),
+ neg_prediction,
text "\t$+8"
],
hcat [
@@ -589,7 +621,11 @@ pprInstr (BCCFAR cond blockid) = vcat [
ppr lbl
]
]
- where lbl = mkAsmTempLabel (getUnique blockid)
+ where lbl = mkLocalBlockLabel (getUnique blockid)
+ neg_prediction = case prediction of
+ Nothing -> empty
+ Just True -> char '-'
+ Just False -> char '+'
pprInstr (JMP lbl)
-- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL"
@@ -741,6 +777,7 @@ pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
]
pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri
pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3)
+pprInstr (NAND reg1 reg2 reg3) = pprLogic (sLit "nand") reg1 reg2 (RIReg reg3)
pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri
pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri
@@ -922,6 +959,10 @@ pprInstr (FETCHPC reg) = vcat [
hcat [ text "1:\tmflr\t", pprReg reg ]
]
+pprInstr HWSYNC = text "\tsync"
+
+pprInstr ISYNC = text "\tisync"
+
pprInstr LWSYNC = text "\tlwsync"
pprInstr NOP = text "\tnop"
diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs
index c4724d4193..30a07b9440 100644
--- a/compiler/nativeGen/PPC/RegInfo.hs
+++ b/compiler/nativeGen/PPC/RegInfo.hs
@@ -20,6 +20,8 @@ where
#include "nativeGen/NCG.h"
#include "HsVersions.h"
+import GhcPrelude
+
import PPC.Instr
import BlockId
@@ -49,14 +51,14 @@ shortcutStatics fn (Statics lbl statics)
shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
shortcutLabel fn lab
- | Just uq <- maybeAsmTemp lab = shortBlockId fn (mkBlockId uq)
- | otherwise = lab
+ | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId
+ | otherwise = lab
shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
shortcutStatic fn (CmmStaticLit (CmmLabel lab))
= CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
- = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
+ = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
-- slightly dodgy, we're ignoring the second label, but this
-- works with the way we use CmmLabelDiffOff for jump tables now.
shortcutStatic _ other_static
@@ -69,6 +71,6 @@ shortBlockId
shortBlockId fn blockid =
case fn blockid of
- Nothing -> mkAsmTempLabel uq
+ Nothing -> mkLocalBlockLabel uq
Just (DestBlockId blockid') -> shortBlockId fn blockid'
where uq = getUnique blockid
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index a1befc7837..227517be88 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -50,6 +50,8 @@ where
#include "nativeGen/NCG.h"
#include "HsVersions.h"
+import GhcPrelude
+
import Reg
import RegClass
import Format
@@ -70,7 +72,7 @@ import Data.Int ( Int8, Int16, Int32, Int64 )
-- squeese functions for the graph allocator -----------------------------------
-- | regSqueeze_class reg
--- Calculuate the maximum number of register colors that could be
+-- Calculate the maximum number of register colors that could be
-- denied to a node of this class due to having this reg
-- as a neighbour.
--
@@ -163,7 +165,7 @@ litToImm (CmmFloat f W32) = ImmFloat f
litToImm (CmmFloat f W64) = ImmDouble f
litToImm (CmmLabel l) = ImmCLbl l
litToImm (CmmLabelOff l off) = ImmIndex l off
-litToImm (CmmLabelDiffOff l1 l2 off)
+litToImm (CmmLabelDiffOff l1 l2 off _)
= ImmConstantSum
(ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
(ImmInt off)
diff --git a/compiler/nativeGen/PprBase.hs b/compiler/nativeGen/PprBase.hs
index aca427449d..d96b18783d 100644
--- a/compiler/nativeGen/PprBase.hs
+++ b/compiler/nativeGen/PprBase.hs
@@ -16,6 +16,8 @@ module PprBase (
where
+import GhcPrelude
+
import AsmUtils
import CLabel
import Cmm
diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs
index 598074d881..d9d56d47c4 100644
--- a/compiler/nativeGen/Reg.hs
+++ b/compiler/nativeGen/Reg.hs
@@ -26,6 +26,8 @@ module Reg (
where
+import GhcPrelude
+
import Outputable
import Unique
import RegClass
diff --git a/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs b/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs
index 6771e4ecb9..634e61cb13 100644
--- a/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs
@@ -21,6 +21,8 @@ module RegAlloc.Graph.ArchBase (
bound,
squeese
) where
+import GhcPrelude
+
import UniqSet
import UniqFM
import Unique
diff --git a/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs b/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs
index 439899071a..0472e4cf09 100644
--- a/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs
@@ -14,9 +14,14 @@ module RegAlloc.Graph.ArchX86 (
worst,
squeese,
) where
+
+import GhcPrelude
+
import RegAlloc.Graph.ArchBase (Reg(..), RegSub(..), RegClass(..))
import UniqSet
+import qualified Data.Array as A
+
-- | Determine the class of a register
classOfReg :: Reg -> RegClass
@@ -57,18 +62,28 @@ regName :: Reg -> Maybe String
regName reg
= case reg of
Reg ClassG32 i
- | i <= 7-> Just $ [ "eax", "ebx", "ecx", "edx"
- , "ebp", "esi", "edi", "esp" ] !! i
+ | i <= 7 ->
+ let names = A.listArray (0,8)
+ [ "eax", "ebx", "ecx", "edx"
+ , "ebp", "esi", "edi", "esp" ]
+ in Just $ names A.! i
RegSub SubL16 (Reg ClassG32 i)
- | i <= 7 -> Just $ [ "ax", "bx", "cx", "dx"
- , "bp", "si", "di", "sp"] !! i
+ | i <= 7 ->
+ let names = A.listArray (0,8)
+ [ "ax", "bx", "cx", "dx"
+ , "bp", "si", "di", "sp"]
+ in Just $ names A.! i
RegSub SubL8 (Reg ClassG32 i)
- | i <= 3 -> Just $ [ "al", "bl", "cl", "dl"] !! i
+ | i <= 3 ->
+ let names = A.listArray (0,4) [ "al", "bl", "cl", "dl"]
+ in Just $ names A.! i
RegSub SubL8H (Reg ClassG32 i)
- | i <= 3 -> Just $ [ "ah", "bh", "ch", "dh"] !! i
+ | i <= 3 ->
+ let names = A.listArray (0,4) [ "ah", "bh", "ch", "dh"]
+ in Just $ names A.! i
_ -> Nothing
diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
index 7e8047f29f..5ca2412c73 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
@@ -3,6 +3,8 @@ module RegAlloc.Graph.Coalesce (
regCoalesce,
slurpJoinMovs
) where
+import GhcPrelude
+
import RegAlloc.Liveness
import Instruction
import Reg
@@ -14,8 +16,6 @@ import UniqFM
import UniqSet
import UniqSupply
-import Data.List
-
-- | Do register coalescing on this top level thing
--
@@ -62,7 +62,7 @@ sinkReg fm r
-- | Slurp out mov instructions that only serve to join live ranges.
--
--- During a mov, if the source reg dies and the destiation reg is
+-- During a mov, if the source reg dies and the destination reg is
-- born then we can rename the two regs to the same thing and
-- eliminate the move.
slurpJoinMovs
diff --git a/compiler/nativeGen/RegAlloc/Graph/Main.hs b/compiler/nativeGen/RegAlloc/Graph/Main.hs
index 08538453f7..4c17d930ea 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Main.hs
@@ -4,6 +4,8 @@
module RegAlloc.Graph.Main (
regAlloc
) where
+import GhcPrelude
+
import qualified GraphColor as Color
import RegAlloc.Liveness
import RegAlloc.Graph.Spill
@@ -25,7 +27,6 @@ import UniqSet
import UniqSupply
import Util (seqList)
-import Data.List
import Data.Maybe
import Control.Monad
diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
index 0014ab6fed..bce24bdd3c 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
@@ -7,6 +7,8 @@ module RegAlloc.Graph.Spill (
SpillStats(..),
accSpillSL
) where
+import GhcPrelude
+
import RegAlloc.Liveness
import Instruction
import Reg
@@ -34,7 +36,7 @@ import qualified Data.IntSet as IntSet
-- TODO: See if we can split some of the live ranges instead of just globally
-- spilling the virtual reg. This might make the spill cleaner's job easier.
--
--- TODO: On CISCy x86 and x86_64 we don't nessesarally have to add a mov instruction
+-- TODO: On CISCy x86 and x86_64 we don't necessarily have to add a mov instruction
-- when making spills. If an instr is using a spilled virtual we may be able to
-- address the spill slot directly.
--
@@ -111,8 +113,8 @@ regSpill_top platform regSlotMap cmm
-- after we've done a successful allocation.
let liveSlotsOnEntry' :: BlockMap IntSet
liveSlotsOnEntry'
- = mapFoldWithKey patchLiveSlot
- liveSlotsOnEntry liveVRegsOnEntry
+ = mapFoldlWithKey patchLiveSlot
+ liveSlotsOnEntry liveVRegsOnEntry
let info'
= LiveInfo static firstId
@@ -129,10 +131,9 @@ regSpill_top platform regSlotMap cmm
-- then record the fact that these slots are now live in those blocks
-- in the given slotmap.
patchLiveSlot
- :: BlockId -> RegSet
- -> BlockMap IntSet -> BlockMap IntSet
+ :: BlockMap IntSet -> BlockId -> RegSet -> BlockMap IntSet
- patchLiveSlot blockId regsLive slotMap
+ patchLiveSlot slotMap blockId regsLive
= let
-- Slots that are already recorded as being live.
curSlotsLive = fromMaybe IntSet.empty
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
index faef4037c2..50001d7334 100644
--- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
@@ -28,6 +28,8 @@
module RegAlloc.Graph.SpillClean (
cleanSpills
) where
+import GhcPrelude
+
import RegAlloc.Liveness
import Instruction
import Reg
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
index 9811f1a64b..f603b609df 100644
--- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
@@ -13,6 +13,8 @@ module RegAlloc.Graph.SpillCost (
lifeMapFromSpillCostInfo
) where
+import GhcPrelude
+
import RegAlloc.Liveness
import Instruction
import RegClass
diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
index 71956025b0..487e3ee03a 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
@@ -16,6 +16,8 @@ module RegAlloc.Graph.Stats (
#include "nativeGen/NCG.h"
+import GhcPrelude
+
import qualified GraphColor as Color
import RegAlloc.Liveness
import RegAlloc.Graph.Spill
@@ -32,9 +34,6 @@ import UniqFM
import UniqSet
import State
-import Data.List
-
-
-- | Holds interesting statistics from the register allocator.
data RegAllocStats statics instr
@@ -265,8 +264,8 @@ pprStatsConflict stats
$$ text "\n")
--- | For every vreg, dump it's how many conflicts it has and its lifetime
--- good for making a scatter plot.
+-- | For every vreg, dump how many conflicts it has, and its lifetime.
+-- Good for making a scatter plot.
pprStatsLifeConflict
:: [RegAllocStats statics instr]
-> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph
@@ -277,10 +276,10 @@ pprStatsLifeConflict stats graph
$ foldl' plusSpillCostInfo zeroSpillCostInfo
$ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ]
- scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of
- Just (_, l) -> l
- Nothing -> 0
- Just node = Color.lookupNode graph r
+ scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of
+ Just (_, l) -> l
+ Nothing -> 0
+ Just node = Color.lookupNode graph r
in parens $ hcat $ punctuate (text ", ")
[ doubleQuotes $ ppr $ Color.nodeId node
, ppr $ sizeUniqSet (Color.nodeConflicts node)
diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
index 204de846ae..7774985dce 100644
--- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
@@ -8,6 +8,8 @@ where
#include "HsVersions.h"
+import GhcPrelude
+
import RegClass
import Reg
diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs
index d4f124e297..1172870729 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Base.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Base.hs
@@ -17,6 +17,8 @@ module RegAlloc.Linear.Base (
where
+import GhcPrelude
+
import RegAlloc.Linear.StackMap
import RegAlloc.Liveness
import Reg
diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
index 9933f5bb49..b4e79432d8 100644
--- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
@@ -9,6 +9,8 @@ module RegAlloc.Linear.FreeRegs (
where
+import GhcPrelude
+
import Reg
import RegClass
diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
index c262b2b059..89f496c409 100644
--- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
@@ -9,6 +9,8 @@
--
module RegAlloc.Linear.JoinToTargets (joinToTargets) where
+import GhcPrelude
+
import RegAlloc.Linear.State
import RegAlloc.Linear.Base
import RegAlloc.Linear.FreeRegs
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index 2ba682ad17..6171d8d20d 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -102,6 +102,8 @@ module RegAlloc.Linear.Main (
#include "HsVersions.h"
+import GhcPrelude
+
import RegAlloc.Linear.State
import RegAlloc.Linear.Base
import RegAlloc.Linear.StackMap
@@ -496,7 +498,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
-- debugging
{- freeregs <- getFreeRegsR
assig <- getAssigR
- pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform }) trace "genRaInsn"
+ pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform } undefined) trace "genRaInsn"
(ppr instr
$$ text "r_dying = " <+> ppr r_dying
$$ text "w_dying = " <+> ppr w_dying
@@ -807,27 +809,29 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
-- case (3): we need to push something out to free up a register
[] ->
- do let keep' = map getUnique keep
+ do let inRegOrBoth (InReg _) = True
+ inRegOrBoth (InBoth _ _) = True
+ inRegOrBoth _ = False
+ let candidates' =
+ flip delListFromUFM keep $
+ filterUFM inRegOrBoth $
+ assig
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+ let candidates = nonDetUFMToList candidates'
-- the vregs we could kick out that are already in a slot
let candidates_inBoth
= [ (temp, reg, mem)
- | (temp, InBoth reg mem) <- nonDetUFMToList assig
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- , temp `notElem` keep'
+ | (temp, InBoth reg mem) <- candidates
, targetClassOfRealReg platform reg == classOfVirtualReg r ]
-- the vregs we could kick out that are only in a reg
-- this would require writing the reg to a new slot before using it.
let candidates_inReg
= [ (temp, reg)
- | (temp, InReg reg) <- nonDetUFMToList assig
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- , temp `notElem` keep'
+ | (temp, InReg reg) <- candidates
, targetClassOfRealReg platform reg == classOfVirtualReg r ]
let result
diff --git a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
index 5d369249c7..581548212a 100644
--- a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
@@ -2,6 +2,8 @@
module RegAlloc.Linear.PPC.FreeRegs
where
+import GhcPrelude
+
import PPC.Regs
import RegClass
import Reg
diff --git a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
index db4d6ba376..653b2707c9 100644
--- a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
@@ -3,6 +3,8 @@
module RegAlloc.Linear.SPARC.FreeRegs
where
+import GhcPrelude
+
import SPARC.Regs
import RegClass
import Reg
diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
index 748fb98c30..95819c6fb3 100644
--- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
@@ -20,6 +20,8 @@ module RegAlloc.Linear.StackMap (
where
+import GhcPrelude
+
import DynFlags
import UniqFM
import Unique
diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs
index 8b17d3ab88..6554188f41 100644
--- a/compiler/nativeGen/RegAlloc/Linear/State.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/State.hs
@@ -31,6 +31,8 @@ module RegAlloc.Linear.State (
)
where
+import GhcPrelude
+
import RegAlloc.Linear.Stats
import RegAlloc.Linear.StackMap
import RegAlloc.Linear.Base
diff --git a/compiler/nativeGen/RegAlloc/Linear/Stats.hs b/compiler/nativeGen/RegAlloc/Linear/Stats.hs
index 71dedaeb55..74f3c834d0 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Stats.hs
@@ -6,6 +6,8 @@ module RegAlloc.Linear.Stats (
where
+import GhcPrelude
+
import RegAlloc.Linear.Base
import RegAlloc.Liveness
import Instruction
@@ -13,7 +15,6 @@ import Instruction
import UniqFM
import Outputable
-import Data.List
import State
-- | Build a map of how many times each reg was alloced, clobbered, loaded etc.
diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
index ae4aa53254..65a566d1c3 100644
--- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
@@ -3,6 +3,8 @@
module RegAlloc.Linear.X86.FreeRegs
where
+import GhcPrelude
+
import X86.Regs
import RegClass
import Reg
diff --git a/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs
index 5a7f71e3f0..713b053356 100644
--- a/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs
@@ -3,6 +3,8 @@
module RegAlloc.Linear.X86_64.FreeRegs
where
+import GhcPrelude
+
import X86.Regs
import RegClass
import Reg
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
index e66139786b..9d93564317 100644
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -35,6 +35,8 @@ module RegAlloc.Liveness (
regLiveness,
natCmmTopToLive
) where
+import GhcPrelude
+
import Reg
import Instruction
@@ -145,10 +147,10 @@ instance Instruction instr => Instruction (InstrSR instr) where
mkJumpInstr target = map Instr (mkJumpInstr target)
mkStackAllocInstr platform amount =
- Instr (mkStackAllocInstr platform amount)
+ Instr <$> mkStackAllocInstr platform amount
mkStackDeallocInstr platform amount =
- Instr (mkStackDeallocInstr platform amount)
+ Instr <$> mkStackDeallocInstr platform amount
-- | An instruction with liveness information.
@@ -812,7 +814,7 @@ computeLiveness
computeLiveness platform sccs
= case checkIsReverseDependent sccs of
Nothing -> livenessSCCs platform mapEmpty [] sccs
- Just bad -> pprPanic "RegAlloc.Liveness.computeLivenss"
+ Just bad -> pprPanic "RegAlloc.Liveness.computeLiveness"
(vcat [ text "SCCs aren't in reverse dependent order"
, text "bad blockId" <+> ppr bad
, ppr sccs])
@@ -1006,5 +1008,3 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
r_dying_br = nonDetEltsUniqSet (mkUniqSet r_dying `unionUniqSets`
live_branch_only)
-- See Note [Unique Determinism and code generation]
-
-
diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs
index 0c793173cb..cd008bbbb1 100644
--- a/compiler/nativeGen/RegClass.hs
+++ b/compiler/nativeGen/RegClass.hs
@@ -4,6 +4,8 @@ module RegClass
where
+import GhcPrelude
+
import Outputable
import Unique
diff --git a/compiler/nativeGen/SPARC/AddrMode.hs b/compiler/nativeGen/SPARC/AddrMode.hs
index bf4d480005..ee40843351 100644
--- a/compiler/nativeGen/SPARC/AddrMode.hs
+++ b/compiler/nativeGen/SPARC/AddrMode.hs
@@ -6,6 +6,8 @@ module SPARC.AddrMode (
where
+import GhcPrelude
+
import SPARC.Imm
import SPARC.Base
import Reg
diff --git a/compiler/nativeGen/SPARC/Base.hs b/compiler/nativeGen/SPARC/Base.hs
index a57d5e1c9a..04e8fed2b3 100644
--- a/compiler/nativeGen/SPARC/Base.hs
+++ b/compiler/nativeGen/SPARC/Base.hs
@@ -18,6 +18,8 @@ module SPARC.Base (
where
+import GhcPrelude
+
import DynFlags
import Panic
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 71d320fa63..a95a22274b 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -22,6 +22,8 @@ where
#include "../includes/MachDeps.h"
-- NCG stuff:
+import GhcPrelude
+
import SPARC.Base
import SPARC.CodeGen.Sanity
import SPARC.CodeGen.Amode
@@ -58,7 +60,6 @@ import FastString
import OrdList
import Outputable
import Platform
-import Unique
import Control.Monad ( mapAndUnzipM )
@@ -162,7 +163,7 @@ stmtToInstrs stmt = do
{-
-Now, given a tree (the argument to an CmmLoad) that references memory,
+Now, given a tree (the argument to a CmmLoad) that references memory,
produce a suitable addressing mode.
A Rule of the Game (tm) for Amodes: use of the addr bit must
@@ -185,7 +186,7 @@ temporary, then do the other computation, and then use the temporary:
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = mkAsmTempLabel (getUnique blockid)
+ where blockLabel = blockLbl blockid
@@ -313,7 +314,7 @@ genCondJump bid bool = do
genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
genSwitch dflags expr targets
- | gopt Opt_PIC dflags
+ | positionIndependent dflags
= error "MachCodeGen: sparc genSwitch PIC not finished\n"
| otherwise
@@ -422,7 +423,10 @@ genCCall target dest_regs args
return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
ForeignTarget expr _
- -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+ -> do (dyn_c, dyn_rs) <- arg_to_int_vregs expr
+ let dyn_r = case dyn_rs of
+ [dyn_r'] -> dyn_r'
+ _ -> panic "SPARC.CodeGen.genCCall: arg_to_int"
return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
PrimTarget mop
@@ -432,7 +436,10 @@ genCCall target dest_regs args
return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
Right mopExpr -> do
- (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+ (dyn_c, dyn_rs) <- arg_to_int_vregs mopExpr
+ let dyn_r = case dyn_rs of
+ [dyn_r'] -> dyn_r'
+ _ -> panic "SPARC.CodeGen.genCCall: arg_to_int"
return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
return lblOrMopExpr
@@ -626,6 +633,10 @@ outOfLineMachOp_table mop
MO_F32_Cosh -> fsLit "coshf"
MO_F32_Tanh -> fsLit "tanhf"
+ MO_F32_Asinh -> fsLit "asinhf"
+ MO_F32_Acosh -> fsLit "acoshf"
+ MO_F32_Atanh -> fsLit "atanhf"
+
MO_F64_Exp -> fsLit "exp"
MO_F64_Log -> fsLit "log"
MO_F64_Sqrt -> fsLit "sqrt"
@@ -644,14 +655,21 @@ outOfLineMachOp_table mop
MO_F64_Cosh -> fsLit "cosh"
MO_F64_Tanh -> fsLit "tanh"
+ MO_F64_Asinh -> fsLit "asinh"
+ MO_F64_Acosh -> fsLit "acosh"
+ MO_F64_Atanh -> fsLit "atanh"
+
MO_UF_Conv w -> fsLit $ word2FloatLabel w
MO_Memcpy _ -> fsLit "memcpy"
MO_Memset _ -> fsLit "memset"
MO_Memmove _ -> fsLit "memmove"
+ MO_Memcmp _ -> fsLit "memcmp"
MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
+ MO_Pdep w -> fsLit $ pdepLabel w
+ MO_Pext w -> fsLit $ pextLabel w
MO_Clz w -> fsLit $ clzLabel w
MO_Ctz w -> fsLit $ ctzLabel w
MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
@@ -663,6 +681,7 @@ outOfLineMachOp_table mop
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
MO_Add2 {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
diff --git a/compiler/nativeGen/SPARC/CodeGen/Amode.hs b/compiler/nativeGen/SPARC/CodeGen/Amode.hs
index a59287f171..33e3f535da 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Amode.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Amode.hs
@@ -4,6 +4,8 @@ module SPARC.CodeGen.Amode (
where
+import GhcPrelude
+
import {-# SOURCE #-} SPARC.CodeGen.Gen32
import SPARC.CodeGen.Base
import SPARC.AddrMode
diff --git a/compiler/nativeGen/SPARC/CodeGen/Base.hs b/compiler/nativeGen/SPARC/CodeGen/Base.hs
index 27b533f46b..039bb6496c 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Base.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Base.hs
@@ -13,6 +13,8 @@ module SPARC.CodeGen.Base (
where
+import GhcPrelude
+
import SPARC.Instr
import SPARC.Cond
import SPARC.AddrMode
diff --git a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs b/compiler/nativeGen/SPARC/CodeGen/CondCode.hs
index e5fb82df4d..e6b2e174b6 100644
--- a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/CondCode.hs
@@ -6,6 +6,8 @@ module SPARC.CodeGen.CondCode (
where
+import GhcPrelude
+
import {-# SOURCE #-} SPARC.CodeGen.Gen32
import SPARC.CodeGen.Base
import SPARC.Instr
diff --git a/compiler/nativeGen/SPARC/CodeGen/Expand.hs b/compiler/nativeGen/SPARC/CodeGen/Expand.hs
index 70cb0111c0..0b318740db 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Expand.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Expand.hs
@@ -5,6 +5,8 @@ module SPARC.CodeGen.Expand (
where
+import GhcPrelude
+
import SPARC.Instr
import SPARC.Imm
import SPARC.AddrMode
@@ -140,7 +142,7 @@ expandMisalignedDoubles instr
--- | The the high partner for this float reg.
+-- | The high partner for this float reg.
fRegHi :: Reg -> Reg
fRegHi (RegReal (RealRegSingle r1))
| r1 >= 32
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
index a0e86f14c4..a7a1f60416 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
@@ -6,6 +6,8 @@ module SPARC.CodeGen.Gen32 (
where
+import GhcPrelude
+
import SPARC.CodeGen.CondCode
import SPARC.CodeGen.Amode
import SPARC.CodeGen.Gen64
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs
index f186d437d0..6fa7482f9b 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs
@@ -7,6 +7,8 @@ module SPARC.CodeGen.Gen64 (
where
+import GhcPrelude
+
import {-# SOURCE #-} SPARC.CodeGen.Gen32
import SPARC.CodeGen.Base
import SPARC.CodeGen.Amode
@@ -191,6 +193,24 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr])
return $ ChildCode64 code r_dst_lo
+-- only W32 supported for now
+iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr])
+ = do
+ r_dst_lo <- getNewRegNat II32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+
+ -- compute expr and load it into r_dst_lo
+ (a_reg, a_code) <- getSomeReg expr
+
+ dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ code = a_code
+ `appOL` toOL
+ [ SRA a_reg (RIImm (ImmInt 31)) r_dst_hi
+ , mkRegRegMoveInstr platform a_reg r_dst_lo ]
+
+ return $ ChildCode64 code r_dst_lo
+
iselExpr64 expr
= pprPanic "iselExpr64(sparc)" (ppr expr)
diff --git a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs b/compiler/nativeGen/SPARC/CodeGen/Sanity.hs
index 81641326f2..fcf5b65bde 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Sanity.hs
@@ -6,6 +6,8 @@ module SPARC.CodeGen.Sanity (
where
+import GhcPrelude
+
import SPARC.Instr
import SPARC.Ppr ()
import Instruction
diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs
index da41457950..3fbfb8603f 100644
--- a/compiler/nativeGen/SPARC/Cond.hs
+++ b/compiler/nativeGen/SPARC/Cond.hs
@@ -7,6 +7,8 @@ module SPARC.Cond (
where
+import GhcPrelude
+
-- | Branch condition codes.
data Cond
= ALWAYS
diff --git a/compiler/nativeGen/SPARC/Imm.hs b/compiler/nativeGen/SPARC/Imm.hs
index cb53ba411c..bd2d4ab131 100644
--- a/compiler/nativeGen/SPARC/Imm.hs
+++ b/compiler/nativeGen/SPARC/Imm.hs
@@ -7,6 +7,8 @@ module SPARC.Imm (
where
+import GhcPrelude
+
import Cmm
import CLabel
@@ -57,7 +59,7 @@ litToImm lit
CmmLabel l -> ImmCLbl l
CmmLabelOff l off -> ImmIndex l off
- CmmLabelDiffOff l1 l2 off
+ CmmLabelDiffOff l1 l2 off _
-> ImmConstantSum
(ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
(ImmInt off)
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
index 4c19ac69a7..54fb513478 100644
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -25,6 +25,8 @@ module SPARC.Instr (
where
+import GhcPrelude
+
import SPARC.Stack
import SPARC.Imm
import SPARC.AddrMode
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs
index 88b04b952a..eb401fff06 100644
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ b/compiler/nativeGen/SPARC/Ppr.hs
@@ -25,6 +25,8 @@ where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
+import GhcPrelude
+
import SPARC.Regs
import SPARC.Instr
import SPARC.Cond
@@ -38,11 +40,12 @@ import PprBase
import Cmm hiding (topInfoTable)
import PprCmm()
+import BlockId
import CLabel
import Hoopl.Label
import Hoopl.Collections
-import Unique ( Uniquable(..), pprUniqueAlways )
+import Unique ( pprUniqueAlways )
import Outputable
import Platform
import FastString
@@ -91,7 +94,7 @@ dspSection = Section Text $
pprBasicBlock :: LabelMap CmmStatics -> NatBasicBlock Instr -> SDoc
pprBasicBlock info_env (BasicBlock blockid instrs)
= maybe_infotable $$
- pprLabel (mkAsmTempLabel (getUnique blockid)) $$
+ pprLabel (blockLbl blockid) $$
vcat (map pprInstr instrs)
where
maybe_infotable = case mapLookup blockid info_env of
@@ -402,7 +405,7 @@ pprInstr (LD format addr reg)
pprReg reg
]
--- 64 bit FP storees are expanded into individual instructions in CodeGen.Expand
+-- 64 bit FP stores are expanded into individual instructions in CodeGen.Expand
pprInstr (ST FF64 reg _)
| RegReal (RealRegSingle{}) <- reg
= panic "SPARC.Ppr: not emitting potentially misaligned ST FF64 instr"
@@ -541,7 +544,7 @@ pprInstr (BI cond b blockid)
text "\tb", pprCond cond,
if b then pp_comma_a else empty,
char '\t',
- ppr (mkAsmTempLabel (getUnique blockid))
+ ppr (blockLbl blockid)
]
pprInstr (BF cond b blockid)
@@ -549,7 +552,7 @@ pprInstr (BF cond b blockid)
text "\tfb", pprCond cond,
if b then pp_comma_a else empty,
char '\t',
- ppr (mkAsmTempLabel (getUnique blockid))
+ ppr (blockLbl blockid)
]
pprInstr (JMP addr) = text "\tjmp\t" <> pprAddr addr
diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs
index 14a5192c2d..d6aadbae94 100644
--- a/compiler/nativeGen/SPARC/Regs.hs
+++ b/compiler/nativeGen/SPARC/Regs.hs
@@ -32,6 +32,8 @@ module SPARC.Regs (
where
+import GhcPrelude
+
import CodeGen.Platform.SPARC
import Reg
import RegClass
@@ -75,7 +77,7 @@ classOfRealReg reg
-- | regSqueeze_class reg
--- Calculuate the maximum number of register colors that could be
+-- Calculate the maximum number of register colors that could be
-- denied to a node of this class due to having this reg
-- as a neighbour.
--
@@ -191,7 +193,7 @@ fPair reg
-- | All the regs that the register allocator can allocate to,
--- with the the fixed use regs removed.
+-- with the fixed use regs removed.
--
allocatableRegs :: [RealReg]
allocatableRegs
diff --git a/compiler/nativeGen/SPARC/ShortcutJump.hs b/compiler/nativeGen/SPARC/ShortcutJump.hs
index 123a345130..83e366cb04 100644
--- a/compiler/nativeGen/SPARC/ShortcutJump.hs
+++ b/compiler/nativeGen/SPARC/ShortcutJump.hs
@@ -8,6 +8,8 @@ module SPARC.ShortcutJump (
where
+import GhcPrelude
+
import SPARC.Instr
import SPARC.Imm
@@ -16,8 +18,6 @@ import BlockId
import Cmm
import Panic
-import Unique
-
data JumpDest
@@ -46,14 +46,14 @@ shortcutStatics fn (Statics lbl statics)
shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
shortcutLabel fn lab
- | Just uq <- maybeAsmTemp lab = shortBlockId fn (mkBlockId uq)
- | otherwise = lab
+ | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId
+ | otherwise = lab
shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
shortcutStatic fn (CmmStaticLit (CmmLabel lab))
= CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
- = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
+ = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
-- slightly dodgy, we're ignoring the second label, but this
-- works with the way we use CmmLabelDiffOff for jump tables now.
shortcutStatic _ other_static
@@ -63,7 +63,7 @@ shortcutStatic _ other_static
shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel
shortBlockId fn blockid =
case fn blockid of
- Nothing -> mkAsmTempLabel (getUnique blockid)
+ Nothing -> blockLbl blockid
Just (DestBlockId blockid') -> shortBlockId fn blockid'
Just (DestImm (ImmCLbl lbl)) -> lbl
_other -> panic "shortBlockId"
diff --git a/compiler/nativeGen/SPARC/Stack.hs b/compiler/nativeGen/SPARC/Stack.hs
index 629b18789f..3f5b2a7289 100644
--- a/compiler/nativeGen/SPARC/Stack.hs
+++ b/compiler/nativeGen/SPARC/Stack.hs
@@ -7,6 +7,8 @@ module SPARC.Stack (
where
+import GhcPrelude
+
import SPARC.AddrMode
import SPARC.Regs
import SPARC.Base
diff --git a/compiler/nativeGen/TargetReg.hs b/compiler/nativeGen/TargetReg.hs
index a298cccaf6..6800b9043b 100644
--- a/compiler/nativeGen/TargetReg.hs
+++ b/compiler/nativeGen/TargetReg.hs
@@ -21,6 +21,8 @@ where
#include "HsVersions.h"
+import GhcPrelude
+
import Reg
import RegClass
import Format
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 341fa43dbc..a2e26bd68b 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -2,9 +2,7 @@
-- The default iteration limit is a bit too low for the definitions
-- in this module.
-#if __GLASGOW_HASKELL__ >= 800
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
-#endif
-----------------------------------------------------------------------------
--
@@ -32,6 +30,8 @@ where
#include "../includes/MachDeps.h"
-- NCG stuff:
+import GhcPrelude
+
import X86.Instr
import X86.Cond
import X86.Regs
@@ -65,7 +65,6 @@ import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
import ForeignCall ( CCallConv(..) )
import OrdList
import Outputable
-import Unique
import FastString
import DynFlags
import Util
@@ -211,6 +210,9 @@ stmtToInstrs stmt = do
-> genCCall dflags is32Bit target result_regs args
CmmBranch id -> genBranch id
+
+ --We try to arrange blocks such that the likely branch is the fallthrough
+ --in CmmContFlowOpt. So we can assume the condition is likely false here.
CmmCondBranch arg true false _ -> do
b1 <- genCondJump true arg
b2 <- genBranch false
@@ -295,7 +297,7 @@ data Amode
= Amode AddrMode InstrBlock
{-
-Now, given a tree (the argument to an CmmLoad) that references memory,
+Now, given a tree (the argument to a CmmLoad) that references memory,
produce a suitable addressing mode.
A Rule of the Game (tm) for Amodes: use of the addr bit must
@@ -328,7 +330,7 @@ is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = mkAsmTempLabel (getUnique blockid)
+ where blockLabel = blockLbl blockid
-- -----------------------------------------------------------------------------
@@ -466,6 +468,20 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
r_dst_lo
)
+iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
+ fn <- getAnyReg expr
+ r_dst_lo <- getNewRegNat II32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+ code = fn r_dst_lo
+ return (
+ ChildCode64 (code `snocOL`
+ MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
+ CLTD II32 `snocOL`
+ MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
+ MOV II32 (OpReg edx) (OpReg r_dst_hi))
+ r_dst_lo
+ )
+
iselExpr64 expr
= pprPanic "iselExpr64(i386)" (ppr expr)
@@ -503,6 +519,9 @@ getRegister' dflags is32Bit (CmmReg reg)
getRegister' dflags is32Bit (CmmRegOff r n)
= getRegister' dflags is32Bit $ mangleIndexTree dflags r n
+getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e])
+ = addAlignmentCheck align <$> getRegister' dflags is32Bit e
+
-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)
@@ -731,8 +750,10 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_F_Ne _ -> condFltReg is32Bit NE x y
MO_F_Gt _ -> condFltReg is32Bit GTT x y
MO_F_Ge _ -> condFltReg is32Bit GE x y
- MO_F_Lt _ -> condFltReg is32Bit LTT x y
- MO_F_Le _ -> condFltReg is32Bit LE x y
+ -- Invert comparison condition and swap operands
+ -- See Note [SSE Parity Checks]
+ MO_F_Lt _ -> condFltReg is32Bit GTT y x
+ MO_F_Le _ -> condFltReg is32Bit GE y x
MO_Eq _ -> condIntReg EQQ x y
MO_Ne _ -> condIntReg NE x y
@@ -1255,6 +1276,21 @@ isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
|| isSuitableFloatingPointLit lit
isOperand _ _ = False
+-- | Given a 'Register', produce a new 'Register' with an instruction block
+-- which will check the value for alignment. Used for @-falignment-sanitisation@.
+addAlignmentCheck :: Int -> Register -> Register
+addAlignmentCheck align reg =
+ case reg of
+ Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt reg)
+ Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt reg)
+ where
+ check :: Format -> Reg -> InstrBlock
+ check fmt reg =
+ ASSERT(not $ isFloatFormat fmt)
+ toOL [ TEST fmt (OpImm $ ImmInt $ align-1) (OpReg reg)
+ , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel
+ ]
+
memConstant :: Int -> CmmLit -> NatM Amode
memConstant align lit = do
lbl <- getNewLabelNat
@@ -1331,15 +1367,17 @@ getCondCode (CmmMachOp mop [x, y])
MO_F_Ne W32 -> condFltCode NE x y
MO_F_Gt W32 -> condFltCode GTT x y
MO_F_Ge W32 -> condFltCode GE x y
- MO_F_Lt W32 -> condFltCode LTT x y
- MO_F_Le W32 -> condFltCode LE x y
+ -- Invert comparison condition and swap operands
+ -- See Note [SSE Parity Checks]
+ MO_F_Lt W32 -> condFltCode GTT y x
+ MO_F_Le W32 -> condFltCode GE y x
MO_F_Eq W64 -> condFltCode EQQ x y
MO_F_Ne W64 -> condFltCode NE x y
MO_F_Gt W64 -> condFltCode GTT x y
MO_F_Ge W64 -> condFltCode GE x y
- MO_F_Lt W64 -> condFltCode LTT x y
- MO_F_Le W64 -> condFltCode LE x y
+ MO_F_Lt W64 -> condFltCode GTT y x
+ MO_F_Le W64 -> condFltCode GE y x
_ -> condIntCode (machOpToCond mop) x y
@@ -1639,11 +1677,19 @@ genCondJump' _ id bool = do
else do
lbl <- getBlockIdNat
- -- see comment with condFltReg
+ -- See Note [SSE Parity Checks]
let code = case cond of
NE -> or_unordered
GU -> plain_test
GEU -> plain_test
+ -- Use ASSERT so we don't break releases if
+ -- LTT/LE creep in somehow.
+ LTT ->
+ ASSERT2(False, ppr "Should have been turned into >")
+ and_ordered
+ LE ->
+ ASSERT2(False, ppr "Should have been turned into >=")
+ and_ordered
_ -> and_ordered
plain_test = unitOL (
@@ -1855,6 +1901,72 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
format = intFormat width
lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
+genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
+ args@[src, mask] = do
+ let platform = targetPlatform dflags
+ if isBmi2Enabled dflags
+ then do code_src <- getAnyReg src
+ code_mask <- getAnyReg mask
+ src_r <- getNewRegNat format
+ mask_r <- getNewRegNat format
+ let dst_r = getRegisterReg platform False (CmmLocal dst)
+ return $ code_src src_r `appOL` code_mask mask_r `appOL`
+ (if width == W8 then
+ -- The PDEP instruction doesn't take a r/m8
+ unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
+ unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
+ unitOL (PDEP II16 (OpReg mask_r) (OpReg src_r ) dst_r)
+ else
+ unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
+ (if width == W8 || width == W16 then
+ -- We used a 16-bit destination register above,
+ -- so zero-extend
+ unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
+ else nilOL)
+ else do
+ targetExpr <- cmmMakeDynamicReference dflags
+ CallReference lbl
+ let target = ForeignTarget targetExpr (ForeignConvention CCallConv
+ [NoHint] [NoHint]
+ CmmMayReturn)
+ genCCall dflags is32Bit target dest_regs args
+ where
+ format = intFormat width
+ lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width))
+
+genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
+ args@[src, mask] = do
+ let platform = targetPlatform dflags
+ if isBmi2Enabled dflags
+ then do code_src <- getAnyReg src
+ code_mask <- getAnyReg mask
+ src_r <- getNewRegNat format
+ mask_r <- getNewRegNat format
+ let dst_r = getRegisterReg platform False (CmmLocal dst)
+ return $ code_src src_r `appOL` code_mask mask_r `appOL`
+ (if width == W8 then
+ -- The PEXT instruction doesn't take a r/m8
+ unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
+ unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
+ unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r)
+ else
+ unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
+ (if width == W8 || width == W16 then
+ -- We used a 16-bit destination register above,
+ -- so zero-extend
+ unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
+ else nilOL)
+ else do
+ targetExpr <- cmmMakeDynamicReference dflags
+ CallReference lbl
+ let target = ForeignTarget targetExpr (ForeignConvention CCallConv
+ [NoHint] [NoHint]
+ CmmMayReturn)
+ genCCall dflags is32Bit target dest_regs args
+ where
+ format = intFormat width
+ lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width))
+
genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
| is32Bit && width == W64 = do
-- Fallback to `hs_clz64` on i386
@@ -2129,6 +2241,8 @@ genCCall _ is32Bit target dest_regs args = do
ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
return code
_ -> panic "genCCall: Wrong number of arguments/results for add2"
+ (PrimTarget (MO_AddWordC width), [res_r, res_c]) ->
+ addSubIntC platform ADD_CC (const Nothing) CARRY width res_r res_c args
(PrimTarget (MO_SubWordC width), [res_r, res_c]) ->
addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args
(PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
@@ -2645,6 +2759,10 @@ outOfLineCmmOp mop res args
MO_F32_Tanh -> fsLit "tanhf"
MO_F32_Pwr -> fsLit "powf"
+ MO_F32_Asinh -> fsLit "asinhf"
+ MO_F32_Acosh -> fsLit "acoshf"
+ MO_F32_Atanh -> fsLit "atanhf"
+
MO_F64_Sqrt -> fsLit "sqrt"
MO_F64_Fabs -> fsLit "fabs"
MO_F64_Sin -> fsLit "sin"
@@ -2662,15 +2780,23 @@ outOfLineCmmOp mop res args
MO_F64_Tanh -> fsLit "tanh"
MO_F64_Pwr -> fsLit "pow"
+ MO_F64_Asinh -> fsLit "asinh"
+ MO_F64_Acosh -> fsLit "acosh"
+ MO_F64_Atanh -> fsLit "atanh"
+
MO_Memcpy _ -> fsLit "memcpy"
MO_Memset _ -> fsLit "memset"
MO_Memmove _ -> fsLit "memmove"
+ MO_Memcmp _ -> fsLit "memcmp"
MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap"
MO_Clz w -> fsLit $ clzLabel w
MO_Ctz _ -> unsupported
+ MO_Pdep w -> fsLit $ pdepLabel w
+ MO_Pext w -> fsLit $ pextLabel w
+
MO_AtomicRMW _ _ -> fsLit "atomicrmw"
MO_AtomicRead _ -> fsLit "atomicread"
MO_AtomicWrite _ -> fsLit "atomicwrite"
@@ -2684,6 +2810,7 @@ outOfLineCmmOp mop res args
MO_Add2 {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
@@ -2698,7 +2825,7 @@ outOfLineCmmOp mop res args
genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
genSwitch dflags expr targets
- | gopt Opt_PIC dflags
+ | positionIndependent dflags
= do
(reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset)
-- getNonClobberedReg because it needs to survive across t_code
@@ -2750,23 +2877,29 @@ genSwitch dflags expr targets
JMP_TBL op ids (Section ReadOnlyData lbl) lbl
]
return code
- where (offset, ids) = switchTargetsToTable targets
+ where
+ (offset, blockIds) = switchTargetsToTable targets
+ ids = map (fmap DestBlockId) blockIds
generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
- = Just (createJumpTable dflags ids section lbl)
+ = let getBlockId (DestBlockId id) = id
+ getBlockId _ = panic "Non-Label target in Jump Table"
+ blockIds = map (fmap getBlockId) ids
+ in Just (createJumpTable dflags blockIds section lbl)
generateJumpTableForInstr _ _ = Nothing
createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
-> GenCmmDecl (Alignment, CmmStatics) h g
createJumpTable dflags ids section lbl
= let jumpTable
- | gopt Opt_PIC dflags =
- let jumpTableEntryRel Nothing
- = CmmStaticLit (CmmInt 0 (wordWidth dflags))
+ | positionIndependent dflags =
+ let ww = wordWidth dflags
+ jumpTableEntryRel Nothing
+ = CmmStaticLit (CmmInt 0 ww)
jumpTableEntryRel (Just blockid)
- = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
- where blockLabel = mkAsmTempLabel (getUnique blockid)
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 ww)
+ where blockLabel = blockLbl blockid
in map jumpTableEntryRel ids
| otherwise = map (jumpTableEntry dflags) ids
in CmmData section (1, Statics lbl jumpTable)
@@ -2797,6 +2930,59 @@ condIntReg cond x y = do
return (Any II32 code)
+-----------------------------------------------------------
+--- Note [SSE Parity Checks] ---
+-----------------------------------------------------------
+
+-- We have to worry about unordered operands (eg. comparisons
+-- against NaN). If the operands are unordered, the comparison
+-- sets the parity flag, carry flag and zero flag.
+-- All comparisons are supposed to return false for unordered
+-- operands except for !=, which returns true.
+--
+-- Optimisation: we don't have to test the parity flag if we
+-- know the test has already excluded the unordered case: eg >
+-- and >= test for a zero carry flag, which can only occur for
+-- ordered operands.
+--
+-- By reversing comparisons we can avoid testing the parity
+-- for < and <= as well. If any of the arguments is an NaN we
+-- return false either way. If both arguments are valid then
+-- x <= y <-> y >= x holds. So it's safe to swap these.
+--
+-- We invert the condition inside getRegister'and getCondCode
+-- which should cover all invertable cases.
+-- All other functions translating FP comparisons to assembly
+-- use these to two generate the comparison code.
+--
+-- As an example consider a simple check:
+--
+-- func :: Float -> Float -> Int
+-- func x y = if x < y then 1 else 0
+--
+-- Which in Cmm gives the floating point comparison.
+--
+-- if (%MO_F_Lt_W32(F1, F2)) goto c2gg; else goto c2gf;
+--
+-- We used to compile this to an assembly code block like this:
+-- _c2gh:
+-- ucomiss %xmm2,%xmm1
+-- jp _c2gf
+-- jb _c2gg
+-- jmp _c2gf
+--
+-- Where we have to introduce an explicit
+-- check for unordered results (using jmp parity):
+--
+-- We can avoid this by exchanging the arguments and inverting the direction
+-- of the comparison. This results in the sequence of:
+--
+-- ucomiss %xmm1,%xmm2
+-- ja _c2g2
+-- jmp _c2g1
+--
+-- Removing the jump reduces the pressure on the branch predidiction system
+-- and plays better with the uOP cache.
condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
@@ -2815,27 +3001,18 @@ condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
CondCode _ cond cond_code <- condFltCode cond x y
tmp1 <- getNewRegNat (archWordFormat is32Bit)
tmp2 <- getNewRegNat (archWordFormat is32Bit)
- let
- -- We have to worry about unordered operands (eg. comparisons
- -- against NaN). If the operands are unordered, the comparison
- -- sets the parity flag, carry flag and zero flag.
- -- All comparisons are supposed to return false for unordered
- -- operands except for !=, which returns true.
- --
- -- Optimisation: we don't have to test the parity flag if we
- -- know the test has already excluded the unordered case: eg >
- -- and >= test for a zero carry flag, which can only occur for
- -- ordered operands.
- --
- -- ToDo: by reversing comparisons we could avoid testing the
- -- parity flag in more cases.
-
+ let -- See Note [SSE Parity Checks]
code dst =
cond_code `appOL`
(case cond of
NE -> or_unordered dst
GU -> plain_test dst
GEU -> plain_test dst
+ -- Use ASSERT so we don't break releases if these creep in.
+ LTT -> ASSERT2(False, ppr "Should have been turned into >")
+ and_ordered dst
+ LE -> ASSERT2(False, ppr "Should have been turned into >=")
+ and_ordered dst
_ -> and_ordered dst)
plain_test dst = toOL [
diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs
index 586dabd8f4..35cbf943e1 100644
--- a/compiler/nativeGen/X86/Cond.hs
+++ b/compiler/nativeGen/X86/Cond.hs
@@ -8,6 +8,8 @@ module X86.Cond (
where
+import GhcPrelude
+
data Cond
= ALWAYS -- What's really used? ToDo
| EQQ
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 71f50e9d2a..c7000c9f4b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -8,7 +8,7 @@
--
-----------------------------------------------------------------------------
-module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest,
+module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..),
getJumpDestBlockId, canShortcut, shortcutStatics,
shortcutJump, i386_insert_ffrees, allocMoreStack,
maxSpillSlots, archWordFormat)
@@ -17,6 +17,8 @@ where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
+import GhcPrelude
+
import X86.Cond
import X86.Regs
import Instruction
@@ -320,7 +322,7 @@ data Instr
| JXX_GBL Cond Imm -- non-local version of JXX
-- Table jump
| JMP_TBL Operand -- Address to jump to
- [Maybe BlockId] -- Blocks in the jump table
+ [Maybe JumpDest] -- Targets of the jump table
Section -- Data section jump table should be put in
CLabel -- Label of jump table
| CALL (Either Imm Reg) [Reg]
@@ -343,6 +345,10 @@ data Instr
| BSF Format Operand Reg -- bit scan forward
| BSR Format Operand Reg -- bit scan reverse
+ -- bit manipulation instructions
+ | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask
+ | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
+
-- prefetch
| PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
-- variant can be NTA, Lvl0, Lvl1, or Lvl2
@@ -462,6 +468,9 @@ x86_regUsageOfInstr platform instr
BSF _ src dst -> mkRU (use_R src []) [dst]
BSR _ src dst -> mkRU (use_R src []) [dst]
+ PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
+ PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
+
-- note: might be a better way to do this
PREFETCH _ _ src -> mkRU (use_R src []) []
LOCK i -> x86_regUsageOfInstr platform i
@@ -638,6 +647,8 @@ x86_patchRegsOfInstr instr env
CLTD _ -> instr
POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
+ PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst)
+ PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst)
BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
@@ -693,7 +704,7 @@ x86_jumpDestsOfInstr
x86_jumpDestsOfInstr insn
= case insn of
JXX _ id -> [id]
- JMP_TBL _ ids _ _ -> [id | Just id <- ids]
+ JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
_ -> []
@@ -704,8 +715,12 @@ x86_patchJumpInstr insn patchF
= case insn of
JXX cc id -> JXX cc (patchF id)
JMP_TBL op ids section lbl
- -> JMP_TBL op (map (fmap patchF) ids) section lbl
+ -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
_ -> insn
+ where
+ patchJumpDest f (DestBlockId id) = DestBlockId (f id)
+ patchJumpDest _ dest = dest
+
@@ -843,25 +858,104 @@ x86_mkJumpInstr
x86_mkJumpInstr id
= [JXX ALWAYS id]
+-- Note [Windows stack layout]
+-- | On most OSes the kernel will place a guard page after the current stack
+-- page. If you allocate larger than a page worth you may jump over this
+-- guard page. Not only is this a security issue, but on certain OSes such
+-- as Windows a new page won't be allocated if you don't hit the guard. This
+-- will cause a segfault or access fault.
+--
+-- This function defines if the current allocation amount requires a probe.
+-- On Windows (for now) we emit a call to _chkstk for this. For other OSes
+-- this is not yet implemented.
+-- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk
+-- The Windows stack looks like this:
+--
+-- +-------------------+
+-- | SP |
+-- +-------------------+
+-- | |
+-- | GUARD PAGE |
+-- | |
+-- +-------------------+
+-- | |
+-- | |
+-- | UNMAPPED |
+-- | |
+-- | |
+-- +-------------------+
+--
+-- In essense each allocation larger than a page size needs to be chunked and
+-- a probe emitted after each page allocation. You have to hit the guard
+-- page so the kernel can map in the next page, otherwise you'll segfault.
+--
+needs_probe_call :: Platform -> Int -> Bool
+needs_probe_call platform amount
+ = case platformOS platform of
+ OSMinGW32 -> case platformArch platform of
+ ArchX86 -> amount > (4 * 1024)
+ ArchX86_64 -> amount > (8 * 1024)
+ _ -> False
+ _ -> False
x86_mkStackAllocInstr
:: Platform
-> Int
- -> Instr
+ -> [Instr]
x86_mkStackAllocInstr platform amount
- = case platformArch platform of
- ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
- ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
- _ -> panic "x86_mkStackAllocInstr"
+ = case platformOS platform of
+ OSMinGW32 ->
+ -- These will clobber AX but this should be ok because
+ --
+ -- 1. It is the first thing we do when entering the closure and AX is
+ -- a caller saved registers on Windows both on x86_64 and x86.
+ --
+ -- 2. The closures are only entered via a call or longjmp in which case
+ -- there are no expectations for volatile registers.
+ --
+ -- 3. When the target is a local branch point it is re-targeted
+ -- after the dealloc, preserving #2. See note [extra spill slots].
+ --
+ -- We emit a call because the stack probes are quite involved and
+ -- would bloat code size a lot. GHC doesn't really have an -Os.
+ -- __chkstk is guaranteed to leave all nonvolatile registers and AX
+ -- untouched. It's part of the standard prologue code for any Windows
+ -- function dropping the stack more than a page.
+ -- See Note [Windows stack layout]
+ case platformArch platform of
+ ArchX86 | needs_probe_call platform amount ->
+ [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax)
+ , CALL (Left $ strImmLit "___chkstk_ms") [eax]
+ , SUB II32 (OpReg eax) (OpReg esp)
+ ]
+ | otherwise ->
+ [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
+ , TEST II32 (OpReg esp) (OpReg esp)
+ ]
+ ArchX86_64 | needs_probe_call platform amount ->
+ [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
+ , CALL (Left $ strImmLit "__chkstk_ms") [rax]
+ , SUB II64 (OpReg rax) (OpReg rsp)
+ ]
+ | otherwise ->
+ [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ , TEST II64 (OpReg rsp) (OpReg rsp)
+ ]
+ _ -> panic "x86_mkStackAllocInstr"
+ _ ->
+ case platformArch platform of
+ ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ]
+ ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ]
+ _ -> panic "x86_mkStackAllocInstr"
x86_mkStackDeallocInstr
:: Platform
-> Int
- -> Instr
+ -> [Instr]
x86_mkStackDeallocInstr platform amount
= case platformArch platform of
- ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
- ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)]
+ ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)]
_ -> panic "x86_mkStackDeallocInstr"
i386_insert_ffrees
@@ -981,7 +1075,7 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
insert_stack_insns (BasicBlock id insns)
| Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id [alloc, JXX ALWAYS new_blockid]
+ = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid]
, BasicBlock new_blockid block' ]
| otherwise
= [ BasicBlock id block' ]
@@ -989,7 +1083,7 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
block' = foldr insert_dealloc [] insns
insert_dealloc insn r = case insn of
- JMP _ _ -> dealloc : insn : r
+ JMP _ _ -> dealloc ++ (insn : r)
JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
_other -> x86_patchJumpInstr insn retarget : r
where retarget b = fromMaybe b (mapLookup b new_blockmap)
@@ -998,7 +1092,6 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
-- in
return (CmmProc info lbl live (ListGraph new_code))
-
data JumpDest = DestBlockId BlockId | DestImm Imm
getJumpDestBlockId :: JumpDest -> Maybe BlockId
@@ -1015,14 +1108,24 @@ canShortcut _ = Nothing
-- The blockset helps avoid following cycles.
shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
- where shortcutJump' fn seen insn@(JXX cc id) =
- if setMember id seen then insn
- else case fn id of
- Nothing -> insn
- Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
- Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
- where seen' = setInsert id seen
- shortcutJump' _ _ other = other
+ where
+ shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr
+ shortcutJump' fn seen insn@(JXX cc id) =
+ if setMember id seen then insn
+ else case fn id of
+ Nothing -> insn
+ Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
+ Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
+ where seen' = setInsert id seen
+ shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
+ let updateBlock (Just (DestBlockId bid)) =
+ case fn bid of
+ Nothing -> Just (DestBlockId bid )
+ Just dest -> Just dest
+ updateBlock dest = dest
+ blocks' = map updateBlock blocks
+ in JMP_TBL addr blocks' section tblId
+ shortcutJump' _ _ other = other
-- Here because it knows about JumpDest
shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
@@ -1033,14 +1136,14 @@ shortcutStatics fn (align, Statics lbl statics)
shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
shortcutLabel fn lab
- | Just uq <- maybeAsmTemp lab = shortBlockId fn emptyUniqSet (mkBlockId uq)
- | otherwise = lab
+ | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId
+ | otherwise = lab
shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
shortcutStatic fn (CmmStaticLit (CmmLabel lab))
= CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
- = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
+ = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
-- slightly dodgy, we're ignoring the second label, but this
-- works with the way we use CmmLabelDiffOff for jump tables now.
shortcutStatic _ other_static
@@ -1054,8 +1157,8 @@ shortBlockId
shortBlockId fn seen blockid =
case (elementOfUniqSet uq seen, fn blockid) of
- (True, _) -> mkAsmTempLabel uq
- (_, Nothing) -> mkAsmTempLabel uq
+ (True, _) -> blockLbl blockid
+ (_, Nothing) -> blockLbl blockid
(_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
(_, Just (DestImm (ImmCLbl lbl))) -> lbl
(_, _other) -> panic "shortBlockId"
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index fce432a3dc..03d4fce794 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -23,6 +23,8 @@ where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
+import GhcPrelude
+
import X86.Regs
import X86.Instr
import X86.Cond
@@ -37,8 +39,9 @@ import Hoopl.Label
import BasicTypes (Alignment)
import DynFlags
import Cmm hiding (topInfoTable)
+import BlockId
import CLabel
-import Unique ( pprUniqueAlways, Uniquable(..) )
+import Unique ( pprUniqueAlways )
import Platform
import FastString
import Outputable
@@ -70,12 +73,17 @@ import Data.Bits
-- .subsections_via_symbols and -dead_strip can be found at
-- <https://developer.apple.com/library/mac/documentation/DeveloperTools/Reference/Assembler/040-Assembler_Directives/asm_directives.html#//apple_ref/doc/uid/TP30000823-TPXREF101>
+pprProcAlignment :: SDoc
+pprProcAlignment = sdocWithDynFlags $ \dflags ->
+ (maybe empty pprAlign . cmmProcAlignment $ dflags)
+
pprNatCmmDecl :: NatCmmDecl (Alignment, CmmStatics) Instr -> SDoc
pprNatCmmDecl (CmmData section dats) =
pprSectionAlign section $$ pprDatas dats
pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
sdocWithDynFlags $ \dflags ->
+ pprProcAlignment $$
case topInfoTable proc of
Nothing ->
case blocks of
@@ -83,6 +91,7 @@ pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
pprLabel lbl
blocks -> -- special case for code without info table:
pprSectionAlign (Section Text lbl) $$
+ pprProcAlignment $$
pprLabel lbl $$ -- blocks guaranteed not null, so label needed
vcat (map (pprBasicBlock top_info) blocks) $$
(if debugLevel dflags > 0
@@ -92,6 +101,7 @@ pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
Just (Statics info_lbl _) ->
sdocWithPlatform $ \platform ->
pprSectionAlign (Section Text info_lbl) $$
+ pprProcAlignment $$
(if platformHasSubsectionsViaSymbols platform
then ppr (mkDeadStripPreventer info_lbl) <> char ':'
else empty) $$
@@ -126,7 +136,7 @@ pprBasicBlock info_env (BasicBlock blockid instrs)
(if debugLevel dflags > 0
then ppr (mkAsmTempEndLabel asmLbl) <> char ':' else empty)
where
- asmLbl = mkAsmTempLabel (getUnique blockid)
+ asmLbl = blockLbl blockid
maybe_infotable = case mapLookup blockid info_env of
Nothing -> empty
Just (Statics info_lbl info) ->
@@ -160,35 +170,116 @@ pprGloblDecl lbl
| not (externallyVisibleCLabel lbl) = empty
| otherwise = text ".globl " <> ppr lbl
-pprTypeAndSizeDecl :: CLabel -> SDoc
-pprTypeAndSizeDecl lbl
+pprLabelType' :: DynFlags -> CLabel -> SDoc
+pprLabelType' dflags lbl =
+ if isCFunctionLabel lbl || functionOkInfoTable then
+ text "@function"
+ else
+ text "@object"
+ where
+ {-
+ NOTE: This is a bit hacky.
+
+ With the `tablesNextToCode` info tables look like this:
+ ```
+ <info table data>
+ label_info:
+ <info table code>
+ ```
+ So actually info table label points exactly to the code and we can mark
+ the label as @function. (This is required to make perf and potentially other
+ tools to work on Haskell binaries).
+ This usually works well but it can cause issues with a linker.
+ A linker uses different algorithms for the relocation depending on
+ the symbol type.For some reason, a linker will generate JUMP_SLOT relocation
+ when constructor info table is referenced from a data section.
+ This only happens with static constructor call so
+ we mark _con_info symbols as `@object` to avoid the issue with relocations.
+
+ @SimonMarlow hack explanation:
+ "The reasoning goes like this:
+
+ * The danger when we mark a symbol as `@function` is that the linker will
+ redirect it to point to the PLT and use a `JUMP_SLOT` relocation when
+ the symbol refers to something outside the current shared object.
+ A PLT / JUMP_SLOT reference only works for symbols that we jump to, not
+ for symbols representing data,, nor for info table symbol references which
+ we expect to point directly to the info table.
+ * GHC generates code that might refer to any info table symbol from the text
+ segment, but that's OK, because those will be explicit GOT references
+ generated by the code generator.
+ * When we refer to info tables from the data segment, it's either
+ * a FUN_STATIC/THUNK_STATIC local to this module
+ * a `con_info` that could be from anywhere
+
+ So, the only info table symbols that we might refer to from the data segment
+ of another shared object are `con_info` symbols, so those are the ones we
+ need to exclude from getting the @function treatment.
+ "
+
+ A good place to check for more
+ https://ghc.haskell.org/trac/ghc/wiki/Commentary/PositionIndependentCode
+
+ Another possible hack is to create an extra local function symbol for
+ every code-like thing to give the needed information for to the tools
+ but mess up with the relocation. https://phabricator.haskell.org/D4730
+ -}
+ functionOkInfoTable = tablesNextToCode dflags &&
+ isInfoTableLabel lbl && not (isConInfoTableLabel lbl)
+
+
+pprTypeDecl :: CLabel -> SDoc
+pprTypeDecl lbl
= sdocWithPlatform $ \platform ->
if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl
- then text ".type " <> ppr lbl <> ptext (sLit ", @object")
+ then
+ sdocWithDynFlags $ \df ->
+ text ".type " <> ppr lbl <> ptext (sLit ", ") <> pprLabelType' df lbl
else empty
pprLabel :: CLabel -> SDoc
pprLabel lbl = pprGloblDecl lbl
- $$ pprTypeAndSizeDecl lbl
+ $$ pprTypeDecl lbl
$$ (ppr lbl <> char ':')
+{-
+Note [Pretty print ASCII when AsmCodeGen]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Previously, when generating assembly code, we created SDoc with
+`(ptext . sLit)` for every bytes in literal bytestring, then
+combine them using `hcat`.
+
+When handling literal bytestrings with millions of bytes,
+millions of SDoc would be created and to combine, leading to
+high memory usage.
+
+Now we escape the given bytestring to string directly and construct
+SDoc only once. This improvement could dramatically decrease the
+memory allocation from 4.7GB to 1.3GB when embedding a 3MB literal
+string in source code. See Trac #14741 for profiling results.
+-}
pprASCII :: [Word8] -> SDoc
pprASCII str
- = hcat (map (do1 . fromIntegral) str)
+ -- Transform this given literal bytestring to escaped string and construct
+ -- the literal SDoc directly.
+ -- See Trac #14741
+ -- and Note [Pretty print ASCII when AsmCodeGen]
+ = ptext $ sLit $ foldr (\w s -> (do1 . fromIntegral) w ++ s) "" str
where
- do1 :: Int -> SDoc
- do1 w | '\t' <- chr w = ptext (sLit "\\t")
- do1 w | '\n' <- chr w = ptext (sLit "\\n")
- do1 w | '"' <- chr w = ptext (sLit "\\\"")
- do1 w | '\\' <- chr w = ptext (sLit "\\\\")
- do1 w | isPrint (chr w) = char (chr w)
- do1 w | otherwise = char '\\' <> octal w
-
- octal :: Int -> SDoc
- octal w = int ((w `div` 64) `mod` 8)
- <> int ((w `div` 8) `mod` 8)
- <> int (w `mod` 8)
+ do1 :: Int -> String
+ do1 w | '\t' <- chr w = "\\t"
+ | '\n' <- chr w = "\\n"
+ | '"' <- chr w = "\\\""
+ | '\\' <- chr w = "\\\\"
+ | isPrint (chr w) = [chr w]
+ | otherwise = '\\' : octal w
+
+ octal :: Int -> String
+ octal w = [ chr (ord '0' + (w `div` 64) `mod` 8)
+ , chr (ord '0' + (w `div` 8) `mod` 8)
+ , chr (ord '0' + w `mod` 8)
+ ]
pprAlign :: Int -> SDoc
pprAlign bytes
@@ -505,7 +596,7 @@ pprDataItem' dflags lit
--
case lit of
-- A relative relocation:
- CmmLabelDiffOff _ _ _ ->
+ CmmLabelDiffOff _ _ _ _ ->
[text "\t.long\t" <> pprImm imm,
text "\t.long\t0"]
_ ->
@@ -516,7 +607,7 @@ pprDataItem' dflags lit
asmComment :: SDoc -> SDoc
-asmComment c = ifPprDebug $ text "# " <> c
+asmComment c = whenPprDebug $ text "# " <> c
pprInstr :: Instr -> SDoc
@@ -645,6 +736,9 @@ pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst
pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst)
pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst)
+pprInstr (PDEP format src mask dst) = pprFormatOpOpReg (sLit "pdep") format src mask dst
+pprInstr (PEXT format src mask dst) = pprFormatOpOpReg (sLit "pext") format src mask dst
+
pprInstr (PREFETCH NTA format src ) = pprFormatOp_ (sLit "prefetchnta") format src
pprInstr (PREFETCH Lvl0 format src) = pprFormatOp_ (sLit "prefetcht0") format src
pprInstr (PREFETCH Lvl1 format src) = pprFormatOp_ (sLit "prefetcht1") format src
@@ -702,7 +796,7 @@ pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
pprInstr (JXX cond blockid)
= pprCondInstr (sLit "j") cond (ppr lab)
- where lab = mkAsmTempLabel (getUnique blockid)
+ where lab = blockLbl blockid
pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
@@ -1259,6 +1353,16 @@ pprFormatRegRegReg name format reg1 reg2 reg3
pprReg format reg3
]
+pprFormatOpOpReg :: LitString -> Format -> Operand -> Operand -> Reg -> SDoc
+pprFormatOpOpReg name format op1 op2 reg3
+ = hcat [
+ pprMnemonic name format,
+ pprOperand format op1,
+ comma,
+ pprOperand format op2,
+ comma,
+ pprReg format reg3
+ ]
pprFormatAddrReg :: LitString -> Format -> AddrMode -> Reg -> SDoc
pprFormatAddrReg name format op dst
@@ -1302,4 +1406,3 @@ pprFormatOpOpCoerce name format1 format2 op1 op2
pprCondInstr :: LitString -> Cond -> SDoc -> SDoc
pprCondInstr name cond arg
= hcat [ char '\t', ptext name, pprCond cond, space, arg]
-
diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs
index 4dfe0350d4..226441b16f 100644
--- a/compiler/nativeGen/X86/RegInfo.hs
+++ b/compiler/nativeGen/X86/RegInfo.hs
@@ -9,6 +9,8 @@ where
#include "nativeGen/NCG.h"
#include "HsVersions.h"
+import GhcPrelude
+
import Format
import Reg
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 4cb82ea224..97c3b984e2 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -48,6 +48,8 @@ where
#include "nativeGen/NCG.h"
#include "HsVersions.h"
+import GhcPrelude
+
import CodeGen.Platform
import Reg
import RegClass
@@ -58,8 +60,10 @@ import DynFlags
import Outputable
import Platform
+import qualified Data.Array as A
+
-- | regSqueeze_class reg
--- Calculuate the maximum number of register colors that could be
+-- Calculate the maximum number of register colors that could be
-- denied to a node of this class due to having this reg
-- as a neighbour.
--
@@ -142,7 +146,7 @@ litToImm (CmmFloat f W32) = ImmFloat f
litToImm (CmmFloat f W64) = ImmDouble f
litToImm (CmmLabel l) = ImmCLbl l
litToImm (CmmLabelOff l off) = ImmIndex l off
-litToImm (CmmLabelDiffOff l1 l2 off)
+litToImm (CmmLabelDiffOff l1 l2 off _)
= ImmConstantSum
(ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
(ImmInt off)
@@ -234,7 +238,6 @@ xmmregnos platform = [firstxmm .. lastxmm platform]
floatregnos :: Platform -> [RegNo]
floatregnos platform = fakeregnos ++ xmmregnos platform
-
-- argRegs is the set of regs which are read for an n-argument call to C.
-- For archs which pass all args on the stack (x86), is empty.
-- Sparc passes up to the first 6 args in regs.
@@ -267,13 +270,13 @@ showReg platform n
| n >= firstxmm = "%xmm" ++ show (n-firstxmm)
| n >= firstfake = "%fake" ++ show (n-firstfake)
| n >= 8 = "%r" ++ show n
- | otherwise = regNames platform !! n
+ | otherwise = regNames platform A.! n
-regNames :: Platform -> [String]
+regNames :: Platform -> A.Array Int String
regNames platform
= if target32Bit platform
- then ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp"]
- else ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp"]
+ then A.listArray (0,8) ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp"]
+ else A.listArray (0,8) ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp"]
@@ -404,7 +407,10 @@ callClobberedRegs platform
| target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform)
| platformOS platform == OSMinGW32
= [rax,rcx,rdx,r8,r9,r10,r11]
- ++ map regSingle (floatregnos platform)
+ -- Only xmm0-5 are caller-saves registers on 64bit windows.
+ -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage )
+ -- For details check the Win64 ABI.
+ ++ map regSingle fakeregnos ++ map xmm [0 .. 5]
| otherwise
-- all xmm regs are caller-saves
-- caller-saves registers