diff options
author | Reid Barton <rwbarton@gmail.com> | 2015-06-25 13:53:57 -0400 |
---|---|---|
committer | Austin Seipp <austin@well-typed.com> | 2015-06-26 14:24:17 -0500 |
commit | 7707e54c0e3e3dc7bd7b0f44a9567770340ace31 (patch) | |
tree | 85c42dd297762bf5a568e734629fc8a8d985e86a | |
parent | 18e0e95fc492a85fac275f600bfd4934c5de45b5 (diff) | |
download | haskell-7707e54c0e3e3dc7bd7b0f44a9567770340ace31.tar.gz |
Be aware of overlapping global STG registers in CmmSink (#10521)
Summary:
On x86_64, commit e2f6bbd3a27685bc667655fdb093734cb565b4cf assigned
the STG registers F1 and D1 the same hardware register (xmm1), and
the same for the registers F2 and D2, etc. When mixing calls to
functions involving Float#s and Double#s, this can cause wrong Cmm
optimizations that assume the F1 and D1 registers are independent.
Reviewers: simonpj, austin
Reviewed By: austin
Subscribers: simonpj, thomie, bgamari
Differential Revision: https://phabricator.haskell.org/D993
GHC Trac Issues: #10521
(cherry picked from commit a2f828a370b220839ad9b31a274c0198ef91b7fe)
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 37 | ||||
-rw-r--r-- | compiler/cmm/CmmSink.hs | 15 | ||||
-rw-r--r-- | compiler/cmm/CmmUtils.hs | 36 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 16 | ||||
-rw-r--r-- | includes/stg/MachRegs.h | 6 | ||||
-rw-r--r-- | testsuite/.gitignore | 2 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T10521.hs | 11 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T10521.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T10521b.hs | 18 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T10521b.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/all.T | 2 |
11 files changed, 118 insertions, 27 deletions
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 1d6c97f41e..3d21ebce2e 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -20,7 +20,6 @@ module CmmExpr , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet , regSetToList - , regUsedIn , Area(..) , module CmmMachOp @@ -373,17 +372,6 @@ instance DefinerOfRegs r a => DefinerOfRegs r (Maybe a) where foldRegsDefd dflags f set (Just x) = foldRegsDefd dflags f set x ----------------------------------------------------------------------------- --- Another reg utility - -regUsedIn :: CmmReg -> CmmExpr -> Bool -_ `regUsedIn` CmmLit _ = False -reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e -reg `regUsedIn` CmmReg reg' = reg == reg' -reg `regUsedIn` CmmRegOff reg' _ = reg == reg' -reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es -_ `regUsedIn` CmmStackSlot _ _ = False - ------------------------------------------------------------------------------ -- Global STG registers ----------------------------------------------------------------------------- @@ -397,6 +385,31 @@ vgcFlag :: CmmType -> VGcPtr vgcFlag ty | isGcPtrType ty = VGcPtr | otherwise = VNonGcPtr +{- +Note [Overlapping global registers] + +The backend might not faithfully implement the abstraction of the STG +machine with independent registers for different values of type +GlobalReg. Specifically, certain pairs of registers (r1, r2) may +overlap in the sense that a store to r1 invalidates the value in r2, +and vice versa. + +Currently this occurs only on the x86_64 architecture where FloatReg n +and DoubleReg n are assigned the same microarchitectural register, in +order to allow functions to receive more Float# or Double# arguments +in registers (as opposed to on the stack). + +There are no specific rules about which registers might overlap with +which other registers, but presumably it's safe to assume that nothing +will overlap with special registers like Sp or BaseReg. + +Use CmmUtils.regsOverlap to determine whether two GlobalRegs overlap +on a particular platform. The instance Eq GlobalReg is syntactic +equality of STG registers and does not take overlap into +account. However it is still used in UserOfRegs/DefinerOfRegs and +there are likely still bugs there, beware! +-} + data GlobalReg -- Argument and return registers = VanillaReg -- pointers, unboxed ints and chars diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs index 22f4d2ec92..7279013e60 100644 --- a/compiler/cmm/CmmSink.hs +++ b/compiler/cmm/CmmSink.hs @@ -510,11 +510,8 @@ okToInline _ _ _ = True -- ----------------------------------------------------------------------------- --- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment --- @r = e@ can be safely commuted past @stmt@. --- --- We only sink "r = G" assignments right now, so conflicts is very simple: --- +-- | @conflicts (r,e) node@ is @False@ if and only if the assignment +-- @r = e@ can be safely commuted past statement @node@. conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool conflicts dflags (r, rhs, addr) node @@ -548,13 +545,15 @@ conflicts dflags (r, rhs, addr) node -- Cmm expression globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool globalRegistersConflict dflags expr node = - foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node + foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr) + False node -- Returns True if node defines any local registers that are used in the -- Cmm expression localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool localRegistersConflict dflags expr node = - foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node + foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal r) expr) + False node -- Note [Sinking and calls] -- ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -693,7 +692,7 @@ loadAddr dflags e w = case e of CmmReg r -> regAddr dflags r 0 w CmmRegOff r i -> regAddr dflags r i w - _other | CmmGlobal Sp `regUsedIn` e -> StackMem + _other | regUsedIn dflags (CmmGlobal Sp) e -> StackMem | otherwise -> AnyMem regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 65d633e6b7..8e7a2dc448 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -42,6 +42,9 @@ module CmmUtils( cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, cmmConstrTag1, + -- Overlap and usage + regsOverlap, regUsedIn, + -- Liveness and bitmaps mkLiveness, @@ -75,6 +78,7 @@ import Unique import UniqSupply import DynFlags import Util +import CodeGen.Platform import Data.Word import Data.Maybe @@ -394,6 +398,38 @@ cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags) +----------------------------------------------------------------------------- +-- Overlap and usage + +-- | Returns True if the two STG registers overlap on the specified +-- platform, in the sense that writing to one will clobber the +-- other. This includes the case that the two registers are the same +-- STG register. See Note [Overlapping global registers] for details. +regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool +regsOverlap dflags (CmmGlobal g) (CmmGlobal g') + | Just real <- globalRegMaybe (targetPlatform dflags) g, + Just real' <- globalRegMaybe (targetPlatform dflags) g', + real == real' + = True +regsOverlap _ reg reg' = reg == reg' + +-- | Returns True if the STG register is used by the expression, in +-- the sense that a store to the register might affect the value of +-- the expression. +-- +-- We must check for overlapping registers and not just equal +-- registers here, otherwise CmmSink may incorrectly reorder +-- assignments that conflict due to overlap. See Trac #10521 and Note +-- [Overlapping global registers]. +regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool +regUsedIn dflags = regUsedIn_ where + _ `regUsedIn_` CmmLit _ = False + reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e + reg `regUsedIn_` CmmReg reg' = regsOverlap dflags reg reg' + reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg' + reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es + _ `regUsedIn_` CmmStackSlot _ _ = False + -------------------------------------------- -- -- mkLiveness diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 5e8944df4a..8d8c8a034d 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -401,11 +401,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e emitMultiAssign [] [] = return () emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs -emitMultiAssign regs rhss = ASSERT( equalLength regs rhss ) - unscramble ([1..] `zip` (regs `zip` rhss)) +emitMultiAssign regs rhss = do + dflags <- getDynFlags + ASSERT( equalLength regs rhss ) + unscramble dflags ([1..] `zip` (regs `zip` rhss)) -unscramble :: [Vrtx] -> FCode () -unscramble vertices = mapM_ do_component components +unscramble :: DynFlags -> [Vrtx] -> FCode () +unscramble dflags vertices = mapM_ do_component components where edges :: [ (Vrtx, Key, [Key]) ] edges = [ (vertex, key1, edges_from stmt1) @@ -432,7 +434,7 @@ unscramble vertices = mapM_ do_component components u <- newUnique let (to_tmp, from_tmp) = split dflags u first_stmt mk_graph to_tmp - unscramble rest + unscramble dflags rest mk_graph from_tmp split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt) @@ -445,8 +447,8 @@ unscramble vertices = mapM_ do_component components mk_graph :: Stmt -> FCode () mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs -mustFollow :: Stmt -> Stmt -> Bool -(reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs + mustFollow :: Stmt -> Stmt -> Bool + (reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs ------------------------------------------------------------------------- -- mkSwitch diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index 3eeb697604..b7090275ab 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -170,6 +170,12 @@ #define REG_R6 r9 #define REG_SpLim r15 +/* +Map both Fn and Dn to register xmmn so that we can pass a function any +combination of up to six Float# or Double# arguments without touching +the stack. See Note [Overlapping global registers] for implications. +*/ + #define REG_F1 xmm1 #define REG_F2 xmm2 #define REG_F3 xmm3 diff --git a/testsuite/.gitignore b/testsuite/.gitignore index 2623c60f46..b1ed88791c 100644 --- a/testsuite/.gitignore +++ b/testsuite/.gitignore @@ -165,6 +165,8 @@ mk/ghcconfig*_inplace_bin_ghc-stage2.exe.mk /tests/codeGen/should_run/SizeOfSmallArray /tests/codeGen/should_run/StaticArraySize /tests/codeGen/should_run/StaticByteArraySize +/tests/codeGen/should_run/T10521 +/tests/codeGen/should_run/T10521b /tests/codeGen/should_run/T1852 /tests/codeGen/should_run/T1861 /tests/codeGen/should_run/T2080 diff --git a/testsuite/tests/codeGen/should_run/T10521.hs b/testsuite/tests/codeGen/should_run/T10521.hs new file mode 100644 index 0000000000..e770ba315c --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T10521.hs @@ -0,0 +1,11 @@ +import Data.Word( Word8 ) + +toV :: Float -> Word8 +toV d = let coeff = significand d * 255.9999 / d + a = truncate $ d * coeff + b = exponent d + in a `seq` (b `seq` a) + +main :: IO () +main = + print $ map toV [ 3.56158e-2, 0.7415215, 0.5383201, 0.1289829, 0.45520145 ] diff --git a/testsuite/tests/codeGen/should_run/T10521.stdout b/testsuite/tests/codeGen/should_run/T10521.stdout new file mode 100644 index 0000000000..9843a1725d --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T10521.stdout @@ -0,0 +1 @@ +[145,189,137,132,233] diff --git a/testsuite/tests/codeGen/should_run/T10521b.hs b/testsuite/tests/codeGen/should_run/T10521b.hs new file mode 100644 index 0000000000..d0433f9b76 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T10521b.hs @@ -0,0 +1,18 @@ +{-# LANGUAGE MagicHash #-} + +import GHC.Exts + +f :: Float# -> Float# +f x = x +{-# NOINLINE f #-} + +g :: Double# -> Double# +g x = x +{-# NOINLINE g #-} + +h :: Float -> Float +h (F# x) = let a = F# (f x) + b = D# (g (2.0##)) + in a `seq` (b `seq` a) + +main = print (h 1.0) diff --git a/testsuite/tests/codeGen/should_run/T10521b.stdout b/testsuite/tests/codeGen/should_run/T10521b.stdout new file mode 100644 index 0000000000..d3827e75a5 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T10521b.stdout @@ -0,0 +1 @@ +1.0 diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 12418f0aad..b2970a2eba 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -129,3 +129,5 @@ test('T9013', omit_ways(['ghci']), # ghci doesn't support unboxed tuples compile_and_run, ['']) test('T9340', normal, compile_and_run, ['']) test('cgrun074', normal, compile_and_run, ['']) +test('T10521', normal, compile_and_run, ['']) +test('T10521b', normal, compile_and_run, ['']) |