summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmForeign.hs
diff options
context:
space:
mode:
authorAndrew Martin <andrew.thaddeus@gmail.com>2019-05-25 15:36:14 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-06-04 01:09:43 -0400
commitdb78ac6f5d69618ff143ab4b572e7f58a1805687 (patch)
treebfff3f99e6710e1a5cad691d3bf6fba42d1d3419 /compiler/codeGen/StgCmmForeign.hs
parent286827be471f9efa67303d57b979e0c32cb8936e (diff)
downloadhaskell-db78ac6f5d69618ff143ab4b572e7f58a1805687.tar.gz
Use a better strategy for determining the offset applied to foreign function arguments that have an unlifted boxed type. We used to use the type of the argument. We now use the type of the foreign function. Add a test to confirm that the roundtrip conversion between an unlifted boxed type and Any is sound in the presence of a foreign function call.
Diffstat (limited to 'compiler/codeGen/StgCmmForeign.hs')
-rw-r--r--compiler/codeGen/StgCmmForeign.hs166
1 files changed, 128 insertions, 38 deletions
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index c1103e7d77..45e5733fc1 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -34,7 +34,6 @@ import CmmUtils
import MkGraph
import Type
import RepType
-import TysPrim
import CLabel
import SMRep
import ForeignCall
@@ -44,20 +43,26 @@ import Outputable
import UniqSupply
import BasicTypes
+import TyCoRep
+import TysPrim
+import Util (zipEqual)
+
import Control.Monad
-----------------------------------------------------------------------------
-- Code generation for Foreign Calls
-----------------------------------------------------------------------------
--- | emit code for a foreign call, and return the results to the sequel.
---
+-- | Emit code for a foreign call, and return the results to the sequel.
+-- Precondition: the length of the arguments list is the same as the
+-- arity of the foreign function.
cgForeignCall :: ForeignCall -- the op
+ -> Type -- type of foreign function
-> [StgArg] -- x,y arguments
-> Type -- result type
-> FCode ReturnKind
-cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
+cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
= do { dflags <- getDynFlags
; let -- in the stdcall calling convention, the symbol needs @size appended
-- to it, where size is the total number of bytes of arguments. We
@@ -70,7 +75,7 @@ cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
-- ToDo: this might not be correct for 64-bit API
arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
(wORD_SIZE dflags)
- ; cmm_args <- getFCallArgs stg_args
+ ; cmm_args <- getFCallArgs stg_args typ
; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
; let ((call_args, arg_hints), cmm_target)
= case target of
@@ -492,43 +497,128 @@ stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
closureField :: DynFlags -> ByteOff -> ByteOff
closureField dflags off = off + fixedHdrSize dflags
--- -----------------------------------------------------------------------------
+-- Note [Unlifted boxed arguments to foreign calls]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
-- For certain types passed to foreign calls, we adjust the actual
--- value passed to the call. For ByteArray#/Array# we pass the
--- address of the actual array, not the address of the heap object.
-
-getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)]
+-- value passed to the call. For ByteArray#, Array#, SmallArray#,
+-- and ArrayArray#, we pass the address of the array's payload, not
+-- the address of the heap object. For example, consider
+-- foreign import "c_foo" foo :: ByteArray# -> Int# -> IO ()
+-- At a Haskell call like `foo x y`, we'll generate a C call that
+-- is more like
+-- c_foo( x+8, y )
+-- where the "+8" takes the heap pointer (x :: ByteArray#) and moves
+-- it past the header words of the ByteArray object to point directly
+-- to the data inside the ByteArray#. (The exact offset depends
+-- on the target architecture and on profiling) By contrast, (y :: Int#)
+-- requires no such adjustment.
+--
+-- This adjustment is performed by 'add_shim'. The size of the
+-- adjustment depends on the type of heap object. But
+-- how can we determine that type? There are two available options.
+-- We could use the types of the actual values that the foreign call
+-- has been applied to, or we could use the types present in the
+-- foreign function's type. Prior to GHC 8.10, we used the former
+-- strategy since it's a little more simple. However, in issue #16650
+-- and more compellingly in the comments of
+-- https://gitlab.haskell.org/ghc/ghc/merge_requests/939, it was
+-- demonstrated that this leads to bad behavior in the presence
+-- of unsafeCoerce#. Returning to the above example, suppose the
+-- Haskell call looked like
+-- foo (unsafeCoerce# p)
+-- where the types of expressions comprising the arguments are
+-- p :: (Any :: TYPE 'UnliftedRep)
+-- i :: Int#
+-- so that the unsafe-coerce is between Any and ByteArray#.
+-- These two types have the same kind (they are both represented by
+-- a heap pointer) so no GC errors will occur if we do this unsafe coerce.
+-- By the time this gets to the code generator the cast has been
+-- discarded so we have
+-- foo p y
+-- But we *must* adjust the pointer to p by a ByteArray# shim,
+-- *not* by an Any shim (the Any shim involves no offset at all).
+--
+-- To avoid this bad behavior, we adopt the second strategy: use
+-- the types present in the foreign function's type.
+-- In collectStgFArgTypes, we convert the foreign function's
+-- type to a list of StgFArgType. Then, in add_shim, we interpret
+-- these as numeric offsets.
+
+getFCallArgs ::
+ [StgArg]
+ -> Type -- the type of the foreign function
+ -> FCode [(CmmExpr, ForeignHint)]
-- (a) Drop void args
-- (b) Add foreign-call shim code
-- It's (b) that makes this differ from getNonVoidArgAmodes
-
-getFCallArgs args
- = do { mb_cmms <- mapM get args
+-- Precondition: args and typs have the same length
+-- See Note [Unlifted boxed arguments to foreign calls]
+getFCallArgs args typ
+ = do { mb_cmms <- mapM get (zipEqual "getFCallArgs" args (collectStgFArgTypes typ))
; return (catMaybes mb_cmms) }
where
- get arg | null arg_reps
- = return Nothing
- | otherwise
- = do { cmm <- getArgAmode (NonVoid arg)
- ; dflags <- getDynFlags
- ; return (Just (add_shim dflags arg_ty cmm, hint)) }
- where
- arg_ty = stgArgType arg
- arg_reps = typePrimRep arg_ty
- hint = typeForeignHint arg_ty
-
-add_shim :: DynFlags -> Type -> CmmExpr -> CmmExpr
-add_shim dflags arg_ty expr
- | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
- = cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
-
- | tycon == smallArrayPrimTyCon || tycon == smallMutableArrayPrimTyCon
- = cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
-
- | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
- = cmmOffsetB dflags expr (arrWordsHdrSize dflags)
-
- | otherwise = expr
+ get (arg,typ)
+ | null arg_reps
+ = return Nothing
+ | otherwise
+ = do { cmm <- getArgAmode (NonVoid arg)
+ ; dflags <- getDynFlags
+ ; return (Just (add_shim dflags typ cmm, hint)) }
+ where
+ arg_ty = stgArgType arg
+ arg_reps = typePrimRep arg_ty
+ hint = typeForeignHint arg_ty
+
+-- The minimum amount of information needed to determine
+-- the offset to apply to an argument to a foreign call.
+-- See Note [Unlifted boxed arguments to foreign calls]
+data StgFArgType
+ = StgPlainType
+ | StgArrayType
+ | StgSmallArrayType
+ | StgByteArrayType
+
+-- See Note [Unlifted boxed arguments to foreign calls]
+add_shim :: DynFlags -> StgFArgType -> CmmExpr -> CmmExpr
+add_shim dflags ty expr = case ty of
+ StgPlainType -> expr
+ StgArrayType -> cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
+ StgSmallArrayType -> cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
+ StgByteArrayType -> cmmOffsetB dflags expr (arrWordsHdrSize dflags)
+
+-- From a function, extract information needed to determine
+-- the offset of each argument when used as a C FFI argument.
+-- See Note [Unlifted boxed arguments to foreign calls]
+collectStgFArgTypes :: Type -> [StgFArgType]
+collectStgFArgTypes = go []
+ where
+ -- Skip foralls
+ go bs (ForAllTy _ res) = go bs res
+ go bs (AppTy{}) = reverse bs
+ go bs (TyConApp{}) = reverse bs
+ go bs (LitTy{}) = reverse bs
+ go bs (TyVarTy{}) = reverse bs
+ go _ (CastTy{}) = panic "myCollectTypeArgs: CastTy"
+ go _ (CoercionTy{}) = panic "myCollectTypeArgs: CoercionTy"
+ go bs (FunTy {ft_arg = arg, ft_res=res}) =
+ go (typeToStgFArgType arg:bs) res
+
+-- Choose the offset based on the type. For anything other
+-- than an unlifted boxed type, there is no offset.
+-- See Note [Unlifted boxed arguments to foreign calls]
+typeToStgFArgType :: Type -> StgFArgType
+typeToStgFArgType typ
+ | tycon == arrayPrimTyCon = StgArrayType
+ | tycon == mutableArrayPrimTyCon = StgArrayType
+ | tycon == arrayArrayPrimTyCon = StgArrayType
+ | tycon == mutableArrayArrayPrimTyCon = StgArrayType
+ | tycon == smallArrayPrimTyCon = StgSmallArrayType
+ | tycon == smallMutableArrayPrimTyCon = StgSmallArrayType
+ | tycon == byteArrayPrimTyCon = StgByteArrayType
+ | tycon == mutableByteArrayPrimTyCon = StgByteArrayType
+ | otherwise = StgPlainType
where
- tycon = tyConAppTyCon (unwrapType arg_ty)
- -- should be a tycon app, since this is a foreign call
+ -- should be a tycon app, since this is a foreign call
+ tycon = tyConAppTyCon (unwrapType typ)
+