summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReid Barton <rwbarton@gmail.com>2015-06-25 13:53:57 -0400
committerAustin Seipp <austin@well-typed.com>2015-06-26 14:24:17 -0500
commit7707e54c0e3e3dc7bd7b0f44a9567770340ace31 (patch)
tree85c42dd297762bf5a568e734629fc8a8d985e86a
parent18e0e95fc492a85fac275f600bfd4934c5de45b5 (diff)
downloadhaskell-7707e54c0e3e3dc7bd7b0f44a9567770340ace31.tar.gz
Be aware of overlapping global STG registers in CmmSink (#10521)
Summary: On x86_64, commit e2f6bbd3a27685bc667655fdb093734cb565b4cf assigned the STG registers F1 and D1 the same hardware register (xmm1), and the same for the registers F2 and D2, etc. When mixing calls to functions involving Float#s and Double#s, this can cause wrong Cmm optimizations that assume the F1 and D1 registers are independent. Reviewers: simonpj, austin Reviewed By: austin Subscribers: simonpj, thomie, bgamari Differential Revision: https://phabricator.haskell.org/D993 GHC Trac Issues: #10521 (cherry picked from commit a2f828a370b220839ad9b31a274c0198ef91b7fe)
-rw-r--r--compiler/cmm/CmmExpr.hs37
-rw-r--r--compiler/cmm/CmmSink.hs15
-rw-r--r--compiler/cmm/CmmUtils.hs36
-rw-r--r--compiler/codeGen/StgCmmUtils.hs16
-rw-r--r--includes/stg/MachRegs.h6
-rw-r--r--testsuite/.gitignore2
-rw-r--r--testsuite/tests/codeGen/should_run/T10521.hs11
-rw-r--r--testsuite/tests/codeGen/should_run/T10521.stdout1
-rw-r--r--testsuite/tests/codeGen/should_run/T10521b.hs18
-rw-r--r--testsuite/tests/codeGen/should_run/T10521b.stdout1
-rw-r--r--testsuite/tests/codeGen/should_run/all.T2
11 files changed, 118 insertions, 27 deletions
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 1d6c97f41e..3d21ebce2e 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -20,7 +20,6 @@ module CmmExpr
, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
, plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
, regSetToList
- , regUsedIn
, Area(..)
, module CmmMachOp
@@ -373,17 +372,6 @@ instance DefinerOfRegs r a => DefinerOfRegs r (Maybe a) where
foldRegsDefd dflags f set (Just x) = foldRegsDefd dflags f set x
-----------------------------------------------------------------------------
--- Another reg utility
-
-regUsedIn :: CmmReg -> CmmExpr -> Bool
-_ `regUsedIn` CmmLit _ = False
-reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e
-reg `regUsedIn` CmmReg reg' = reg == reg'
-reg `regUsedIn` CmmRegOff reg' _ = reg == reg'
-reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es
-_ `regUsedIn` CmmStackSlot _ _ = False
-
------------------------------------------------------------------------------
-- Global STG registers
-----------------------------------------------------------------------------
@@ -397,6 +385,31 @@ vgcFlag :: CmmType -> VGcPtr
vgcFlag ty | isGcPtrType ty = VGcPtr
| otherwise = VNonGcPtr
+{-
+Note [Overlapping global registers]
+
+The backend might not faithfully implement the abstraction of the STG
+machine with independent registers for different values of type
+GlobalReg. Specifically, certain pairs of registers (r1, r2) may
+overlap in the sense that a store to r1 invalidates the value in r2,
+and vice versa.
+
+Currently this occurs only on the x86_64 architecture where FloatReg n
+and DoubleReg n are assigned the same microarchitectural register, in
+order to allow functions to receive more Float# or Double# arguments
+in registers (as opposed to on the stack).
+
+There are no specific rules about which registers might overlap with
+which other registers, but presumably it's safe to assume that nothing
+will overlap with special registers like Sp or BaseReg.
+
+Use CmmUtils.regsOverlap to determine whether two GlobalRegs overlap
+on a particular platform. The instance Eq GlobalReg is syntactic
+equality of STG registers and does not take overlap into
+account. However it is still used in UserOfRegs/DefinerOfRegs and
+there are likely still bugs there, beware!
+-}
+
data GlobalReg
-- Argument and return registers
= VanillaReg -- pointers, unboxed ints and chars
diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
index 22f4d2ec92..7279013e60 100644
--- a/compiler/cmm/CmmSink.hs
+++ b/compiler/cmm/CmmSink.hs
@@ -510,11 +510,8 @@ okToInline _ _ _ = True
-- -----------------------------------------------------------------------------
--- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
--- @r = e@ can be safely commuted past @stmt@.
---
--- We only sink "r = G" assignments right now, so conflicts is very simple:
---
+-- | @conflicts (r,e) node@ is @False@ if and only if the assignment
+-- @r = e@ can be safely commuted past statement @node@.
conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
conflicts dflags (r, rhs, addr) node
@@ -548,13 +545,15 @@ conflicts dflags (r, rhs, addr) node
-- Cmm expression
globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict dflags expr node =
- foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node
+ foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr)
+ False node
-- Returns True if node defines any local registers that are used in the
-- Cmm expression
localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
localRegistersConflict dflags expr node =
- foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node
+ foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal r) expr)
+ False node
-- Note [Sinking and calls]
-- ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -693,7 +692,7 @@ loadAddr dflags e w =
case e of
CmmReg r -> regAddr dflags r 0 w
CmmRegOff r i -> regAddr dflags r i w
- _other | CmmGlobal Sp `regUsedIn` e -> StackMem
+ _other | regUsedIn dflags (CmmGlobal Sp) e -> StackMem
| otherwise -> AnyMem
regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
index 65d633e6b7..8e7a2dc448 100644
--- a/compiler/cmm/CmmUtils.hs
+++ b/compiler/cmm/CmmUtils.hs
@@ -42,6 +42,9 @@ module CmmUtils(
cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged,
cmmConstrTag1,
+ -- Overlap and usage
+ regsOverlap, regUsedIn,
+
-- Liveness and bitmaps
mkLiveness,
@@ -75,6 +78,7 @@ import Unique
import UniqSupply
import DynFlags
import Util
+import CodeGen.Platform
import Data.Word
import Data.Maybe
@@ -394,6 +398,38 @@ cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr
cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags)
+-----------------------------------------------------------------------------
+-- Overlap and usage
+
+-- | Returns True if the two STG registers overlap on the specified
+-- platform, in the sense that writing to one will clobber the
+-- other. This includes the case that the two registers are the same
+-- STG register. See Note [Overlapping global registers] for details.
+regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool
+regsOverlap dflags (CmmGlobal g) (CmmGlobal g')
+ | Just real <- globalRegMaybe (targetPlatform dflags) g,
+ Just real' <- globalRegMaybe (targetPlatform dflags) g',
+ real == real'
+ = True
+regsOverlap _ reg reg' = reg == reg'
+
+-- | Returns True if the STG register is used by the expression, in
+-- the sense that a store to the register might affect the value of
+-- the expression.
+--
+-- We must check for overlapping registers and not just equal
+-- registers here, otherwise CmmSink may incorrectly reorder
+-- assignments that conflict due to overlap. See Trac #10521 and Note
+-- [Overlapping global registers].
+regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool
+regUsedIn dflags = regUsedIn_ where
+ _ `regUsedIn_` CmmLit _ = False
+ reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e
+ reg `regUsedIn_` CmmReg reg' = regsOverlap dflags reg reg'
+ reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg'
+ reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es
+ _ `regUsedIn_` CmmStackSlot _ _ = False
+
--------------------------------------------
--
-- mkLiveness
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index 5e8944df4a..8d8c8a034d 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -401,11 +401,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e
emitMultiAssign [] [] = return ()
emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs
-emitMultiAssign regs rhss = ASSERT( equalLength regs rhss )
- unscramble ([1..] `zip` (regs `zip` rhss))
+emitMultiAssign regs rhss = do
+ dflags <- getDynFlags
+ ASSERT( equalLength regs rhss )
+ unscramble dflags ([1..] `zip` (regs `zip` rhss))
-unscramble :: [Vrtx] -> FCode ()
-unscramble vertices = mapM_ do_component components
+unscramble :: DynFlags -> [Vrtx] -> FCode ()
+unscramble dflags vertices = mapM_ do_component components
where
edges :: [ (Vrtx, Key, [Key]) ]
edges = [ (vertex, key1, edges_from stmt1)
@@ -432,7 +434,7 @@ unscramble vertices = mapM_ do_component components
u <- newUnique
let (to_tmp, from_tmp) = split dflags u first_stmt
mk_graph to_tmp
- unscramble rest
+ unscramble dflags rest
mk_graph from_tmp
split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt)
@@ -445,8 +447,8 @@ unscramble vertices = mapM_ do_component components
mk_graph :: Stmt -> FCode ()
mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs
-mustFollow :: Stmt -> Stmt -> Bool
-(reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs
+ mustFollow :: Stmt -> Stmt -> Bool
+ (reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs
-------------------------------------------------------------------------
-- mkSwitch
diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h
index 3eeb697604..b7090275ab 100644
--- a/includes/stg/MachRegs.h
+++ b/includes/stg/MachRegs.h
@@ -170,6 +170,12 @@
#define REG_R6 r9
#define REG_SpLim r15
+/*
+Map both Fn and Dn to register xmmn so that we can pass a function any
+combination of up to six Float# or Double# arguments without touching
+the stack. See Note [Overlapping global registers] for implications.
+*/
+
#define REG_F1 xmm1
#define REG_F2 xmm2
#define REG_F3 xmm3
diff --git a/testsuite/.gitignore b/testsuite/.gitignore
index 2623c60f46..b1ed88791c 100644
--- a/testsuite/.gitignore
+++ b/testsuite/.gitignore
@@ -165,6 +165,8 @@ mk/ghcconfig*_inplace_bin_ghc-stage2.exe.mk
/tests/codeGen/should_run/SizeOfSmallArray
/tests/codeGen/should_run/StaticArraySize
/tests/codeGen/should_run/StaticByteArraySize
+/tests/codeGen/should_run/T10521
+/tests/codeGen/should_run/T10521b
/tests/codeGen/should_run/T1852
/tests/codeGen/should_run/T1861
/tests/codeGen/should_run/T2080
diff --git a/testsuite/tests/codeGen/should_run/T10521.hs b/testsuite/tests/codeGen/should_run/T10521.hs
new file mode 100644
index 0000000000..e770ba315c
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T10521.hs
@@ -0,0 +1,11 @@
+import Data.Word( Word8 )
+
+toV :: Float -> Word8
+toV d = let coeff = significand d * 255.9999 / d
+ a = truncate $ d * coeff
+ b = exponent d
+ in a `seq` (b `seq` a)
+
+main :: IO ()
+main =
+ print $ map toV [ 3.56158e-2, 0.7415215, 0.5383201, 0.1289829, 0.45520145 ]
diff --git a/testsuite/tests/codeGen/should_run/T10521.stdout b/testsuite/tests/codeGen/should_run/T10521.stdout
new file mode 100644
index 0000000000..9843a1725d
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T10521.stdout
@@ -0,0 +1 @@
+[145,189,137,132,233]
diff --git a/testsuite/tests/codeGen/should_run/T10521b.hs b/testsuite/tests/codeGen/should_run/T10521b.hs
new file mode 100644
index 0000000000..d0433f9b76
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T10521b.hs
@@ -0,0 +1,18 @@
+{-# LANGUAGE MagicHash #-}
+
+import GHC.Exts
+
+f :: Float# -> Float#
+f x = x
+{-# NOINLINE f #-}
+
+g :: Double# -> Double#
+g x = x
+{-# NOINLINE g #-}
+
+h :: Float -> Float
+h (F# x) = let a = F# (f x)
+ b = D# (g (2.0##))
+ in a `seq` (b `seq` a)
+
+main = print (h 1.0)
diff --git a/testsuite/tests/codeGen/should_run/T10521b.stdout b/testsuite/tests/codeGen/should_run/T10521b.stdout
new file mode 100644
index 0000000000..d3827e75a5
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T10521b.stdout
@@ -0,0 +1 @@
+1.0
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 12418f0aad..b2970a2eba 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -129,3 +129,5 @@ test('T9013', omit_ways(['ghci']), # ghci doesn't support unboxed tuples
compile_and_run, [''])
test('T9340', normal, compile_and_run, [''])
test('cgrun074', normal, compile_and_run, [''])
+test('T10521', normal, compile_and_run, [''])
+test('T10521b', normal, compile_and_run, [''])