From f192e623f579e09b7b5442cc707a40482b76e81e Mon Sep 17 00:00:00 2001 From: Sylvain Henry Date: Fri, 14 May 2021 17:31:38 +0200 Subject: Cmm: fix sinking after suspendThread Suppose a safe call: myCall(x,y,z) It is lowered into three unsafe calls in Cmm: r = suspendThread(...); myCall(x,y,z); resumeThread(r); Consider the following situation for myCall arguments: x = Sp[..] -- stack y = Hp[..] -- heap z = R1 -- global register r = suspendThread(...); myCall(x,y,z); resumeThread(r); The sink pass assumes that unsafe calls clobber memory (heap and stack), hence x and y assignments are not sunk after `suspendThread`. The sink pass also correctly handles global register clobbering for all unsafe calls, except `suspendThread`! `suspendThread` is special because it releases the capability the thread is running on. Hence the sink pass must also take into account global registers that are mapped into memory (in the capability). In the example above, we could get: r = suspendThread(...); z = R1 myCall(x,y,z); resumeThread(r); But this transformation isn't valid if R1 is (BaseReg->rR1) as BaseReg is invalid between suspendThread and resumeThread. This caused argument corruption at least with the C backend ("unregisterised") in #19237. Fix #19237 --- compiler/GHC/Cmm/LayoutStack.hs | 15 ++------------- compiler/GHC/Cmm/MachOp.hs | 19 ++++++++++++++----- compiler/GHC/Cmm/Parser.y | 3 +++ compiler/GHC/Cmm/Sink.hs | 14 ++++++++++++-- compiler/GHC/CmmToAsm/PPC/CodeGen.hs | 3 +++ compiler/GHC/CmmToAsm/SPARC/CodeGen.hs | 3 +++ compiler/GHC/CmmToAsm/X86/CodeGen.hs | 3 +++ compiler/GHC/CmmToC.hs | 11 ++++++++++- compiler/GHC/CmmToLlvm/CodeGen.hs | 3 +++ testsuite/tests/ffi/should_run/T19237.hs | 26 ++++++++++++++++++++++++++ testsuite/tests/ffi/should_run/T19237_c.c | 9 +++++++++ testsuite/tests/ffi/should_run/all.T | 2 ++ 12 files changed, 90 insertions(+), 21 deletions(-) create mode 100644 testsuite/tests/ffi/should_run/T19237.hs create mode 100644 testsuite/tests/ffi/should_run/T19237_c.c diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs index 2fdbb1fe5a..b996427bba 100644 --- a/compiler/GHC/Cmm/LayoutStack.hs +++ b/compiler/GHC/Cmm/LayoutStack.hs @@ -12,14 +12,11 @@ import GHC.StgToCmm.Monad ( newTemp ) -- XXX layering violation import GHC.StgToCmm.Utils ( callerSaveVolatileRegs ) -- XXX layering violation import GHC.StgToCmm.Foreign ( saveThreadState, loadThreadState ) -- XXX layering violation -import GHC.Types.Basic import GHC.Cmm import GHC.Cmm.Info import GHC.Cmm.BlockId -import GHC.Cmm.CLabel import GHC.Cmm.Utils import GHC.Cmm.Graph -import GHC.Types.ForeignCall import GHC.Cmm.Liveness import GHC.Cmm.ProcPoint import GHC.Runtime.Heap.Layout @@ -34,7 +31,6 @@ import GHC.Types.Unique.FM import GHC.Utils.Misc import GHC.Driver.Session -import GHC.Data.FastString import GHC.Utils.Outputable hiding ( isEmpty ) import GHC.Utils.Panic import qualified Data.Set as Set @@ -1190,21 +1186,14 @@ lowerSafeForeignCall profile block | otherwise = return block -foreignLbl :: FastString -> CmmExpr -foreignLbl name = CmmLit (CmmLabel (mkForeignLabel name Nothing ForeignLabelInExternalPackage IsFunction)) - callSuspendThread :: Platform -> LocalReg -> Bool -> CmmNode O O callSuspendThread platform id intrbl = - CmmUnsafeForeignCall - (ForeignTarget (foreignLbl (fsLit "suspendThread")) - (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn)) + CmmUnsafeForeignCall (PrimTarget MO_SuspendThread) [id] [baseExpr, mkIntExpr platform (fromEnum intrbl)] callResumeThread :: LocalReg -> LocalReg -> CmmNode O O callResumeThread new_base id = - CmmUnsafeForeignCall - (ForeignTarget (foreignLbl (fsLit "resumeThread")) - (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn)) + CmmUnsafeForeignCall (PrimTarget MO_ResumeThread) [new_base] [CmmReg (CmmLocal id)] -- ----------------------------------------------------------------------------- diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs index 558cb13a7e..b91263ce47 100644 --- a/compiler/GHC/Cmm/MachOp.hs +++ b/compiler/GHC/Cmm/MachOp.hs @@ -638,6 +638,12 @@ data CallishMachOp -- Should be an AtomicRMW variant eventually. -- Sequential consistent. | MO_Xchg Width + + -- These rts provided functions are special: suspendThread releases the + -- capability, hence we mustn't sink any use of data stored in the capability + -- after this instruction. + | MO_SuspendThread + | MO_ResumeThread deriving (Eq, Show) -- | The operation to perform atomically. @@ -653,13 +659,16 @@ data AtomicMachOp = pprCallishMachOp :: CallishMachOp -> SDoc pprCallishMachOp mo = text (show mo) +-- | Return (results_hints,args_hints) callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint]) callishMachOpHints op = case op of - MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint]) - MO_Memset _ -> ([], [AddrHint,NoHint,NoHint]) - MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint]) - MO_Memcmp _ -> ([], [AddrHint, AddrHint, NoHint]) - _ -> ([],[]) + MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint]) + MO_Memset _ -> ([], [AddrHint,NoHint,NoHint]) + MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint]) + MO_Memcmp _ -> ([], [AddrHint, AddrHint, NoHint]) + MO_SuspendThread -> ([AddrHint], [AddrHint,NoHint]) + MO_ResumeThread -> ([AddrHint], [AddrHint]) + _ -> ([],[]) -- empty lists indicate NoHint -- | The alignment of a 'memcpy'-ish operation. diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index 4576eb9b38..a83feff8cf 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -1027,6 +1027,9 @@ callishMachOps platform = listToUFM $ ( "memmove", memcpyLikeTweakArgs MO_Memmove ), ( "memcmp", memcpyLikeTweakArgs MO_Memcmp ), + ( "suspendThread", (MO_SuspendThread,) ), + ( "resumeThread", (MO_ResumeThread,) ), + ("prefetch0", (MO_Prefetch_Data 0,)), ("prefetch1", (MO_Prefetch_Data 1,)), ("prefetch2", (MO_Prefetch_Data 2,)), diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs index cd13d6b655..3ef58b3648 100644 --- a/compiler/GHC/Cmm/Sink.hs +++ b/compiler/GHC/Cmm/Sink.hs @@ -622,10 +622,16 @@ conflicts platform (r, rhs, addr) node -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap] | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True - -- (6) native calls clobber any memory + -- (6) suspendThread clobbers every global register not backed by a real + -- register. It also clobbers heap and stack but this is handled by (5) + | CmmUnsafeForeignCall (PrimTarget MO_SuspendThread) _ _ <- node + , foldRegsUsed platform (\b g -> globalRegMaybe platform g == Nothing || b) False rhs + = True + + -- (7) native calls clobber any memory | CmmCall{} <- node, memConflicts addr AnyMem = True - -- (7) otherwise, no conflict + -- (8) otherwise, no conflict | otherwise = False {- Note [Inlining foldRegsDefd] @@ -759,6 +765,10 @@ data AbsMem -- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and -- therefore we should never float any memory operations across one of -- these calls. +-- +-- `suspendThread` releases the capability used by the thread, hence we mustn't +-- float accesses to heap, stack or virtual global registers stored in the +-- capability (e.g. with unregisterised build, see #19237). bothMems :: AbsMem -> AbsMem -> AbsMem diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs index c3e66c02ac..4be45098be 100644 --- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs @@ -2012,6 +2012,9 @@ genCCall' config gcp target dest_regs args MO_Memmove _ -> (fsLit "memmove", False) MO_Memcmp _ -> (fsLit "memcmp", False) + MO_SuspendThread -> (fsLit "suspendThread", False) + MO_ResumeThread -> (fsLit "resumeThread", False) + MO_BSwap w -> (bSwapLabel w, False) MO_BRev w -> (bRevLabel w, False) MO_PopCnt w -> (popCntLabel w, False) diff --git a/compiler/GHC/CmmToAsm/SPARC/CodeGen.hs b/compiler/GHC/CmmToAsm/SPARC/CodeGen.hs index 974aec02c2..0a5152f425 100644 --- a/compiler/GHC/CmmToAsm/SPARC/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/SPARC/CodeGen.hs @@ -665,6 +665,9 @@ outOfLineMachOp_table mop MO_Memmove _ -> fsLit "memmove" MO_Memcmp _ -> fsLit "memcmp" + MO_SuspendThread -> fsLit "suspendThread" + MO_ResumeThread -> fsLit "resumeThread" + MO_BSwap w -> bSwapLabel w MO_BRev w -> bRevLabel w MO_PopCnt w -> popCntLabel w diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index 7f9b842c1b..8da259e73b 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -3395,6 +3395,9 @@ outOfLineCmmOp bid mop res args MO_Memmove _ -> fsLit "memmove" MO_Memcmp _ -> fsLit "memcmp" + MO_SuspendThread -> fsLit "suspendThread" + MO_ResumeThread -> fsLit "resumeThread" + MO_PopCnt _ -> fsLit "popcnt" MO_BSwap _ -> fsLit "bswap" {- Here the C implementation is used as there is no x86 diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs index d5457d4fae..ae6f4b91b6 100644 --- a/compiler/GHC/CmmToC.hs +++ b/compiler/GHC/CmmToC.hs @@ -268,12 +268,18 @@ pprStmt platform stmt = hresults = zip results res_hints hargs = zip args arg_hints + need_cdecl + | Just _align <- machOpMemcpyishAlign op = True + | MO_ResumeThread <- op = True + | MO_SuspendThread <- op = True + | otherwise = False + fn_call -- The mem primops carry an extra alignment arg. -- We could maybe emit an alignment directive using this info. -- We also need to cast mem primops to prevent conflicts with GCC -- builtins (see bug #5967). - | Just _align <- machOpMemcpyishAlign op + | need_cdecl = (text ";EFF_(" <> fn <> char ')' <> semi) $$ pprForeignCall platform fn cconv hresults hargs | otherwise @@ -825,6 +831,9 @@ pprCallishMachOp_for_C mop MO_Memmove _ -> text "memmove" MO_Memcmp _ -> text "memcmp" + MO_SuspendThread -> text "suspendThread" + MO_ResumeThread -> text "resumeThread" + MO_BSwap w -> ftext (bSwapLabel w) MO_BRev w -> ftext (bRevLabel w) MO_PopCnt w -> ftext (popCntLabel w) diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs index 5ccadae4fa..bfeb39171d 100644 --- a/compiler/GHC/CmmToLlvm/CodeGen.hs +++ b/compiler/GHC/CmmToLlvm/CodeGen.hs @@ -862,6 +862,9 @@ cmmPrimOpFunctions mop = do MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2 MO_Memcmp _ -> fsLit $ "memcmp" + MO_SuspendThread -> fsLit $ "suspendThread" + MO_ResumeThread -> fsLit $ "resumeThread" + (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w) (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w) (MO_BRev w) -> fsLit $ "llvm.bitreverse." ++ showSDoc dflags (ppr $ widthToLlvmInt w) diff --git a/testsuite/tests/ffi/should_run/T19237.hs b/testsuite/tests/ffi/should_run/T19237.hs new file mode 100644 index 0000000000..4080067d01 --- /dev/null +++ b/testsuite/tests/ffi/should_run/T19237.hs @@ -0,0 +1,26 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# OPTIONS_GHC -dno-typeable-binds -O #-} + +module Main where + +import Control.Monad +import GHC.Exts +import Control.Concurrent + +foreign import ccall expect_999 :: Int# -> IO () + +main :: IO () +main = do + _ <- forkIO $ forever $ putStr "" + replicateM_ 100000 (baz (# #)) + +{-# NOINLINE baz #-} +baz :: (# #) -> IO () +baz c = expect_999 (bar c) + +{-# NOINLINE bar #-} +bar :: (# #) -> Int# +bar (# #) = 999# diff --git a/testsuite/tests/ffi/should_run/T19237_c.c b/testsuite/tests/ffi/should_run/T19237_c.c new file mode 100644 index 0000000000..a052d72113 --- /dev/null +++ b/testsuite/tests/ffi/should_run/T19237_c.c @@ -0,0 +1,9 @@ +#include +#include + +void expect_999(int p) { + if (p != 999) { + printf("Error: received %d\n",p); + exit(1); + } +} diff --git a/testsuite/tests/ffi/should_run/all.T b/testsuite/tests/ffi/should_run/all.T index 3116946d29..f4950cf7ca 100644 --- a/testsuite/tests/ffi/should_run/all.T +++ b/testsuite/tests/ffi/should_run/all.T @@ -229,3 +229,5 @@ test('IncallAffinity', when(unregisterised(), skip)], compile_and_run, ['IncallAffinity_c.c -no-hs-main']) + +test('T19237', normal, compile_and_run, ['T19237_c.c']) -- cgit v1.2.1